From c43ea53357a5f6dcc5bcc74d5c7af72026919c85 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:28:48 -0500 Subject: [PATCH 01/40] First pass at upgrade to Django 5.2 --- compose/local/django/Dockerfile | 22 +- compose/production/django/Dockerfile | 17 +- config/settings/base.py | 3 +- config/settings/local.py | 2 +- config/urls.py | 11 +- documentcloud/addons/choices.py | 23 +- documentcloud/core/choices.py | 228 ++++--- documentcloud/core/filters.py | 6 +- documentcloud/documents/choices.py | 107 +-- documentcloud/documents/fields.py | 10 +- documentcloud/documents/models/document.py | 4 +- .../documents/processing/sidekick/__init__.py | 0 .../documents/processing/sidekick/common | 1 - .../documents/processing/sidekick/main.py | 184 ------ .../processing/sidekick/requirements.txt | 9 - documentcloud/documents/views.py | 2 +- documentcloud/entities/choices.py | 17 +- ...20_remove_organization_members_and_more.py | 25 + ...on_members_organization_parent_and_more.py | 47 ++ documentcloud/projects/choices.py | 21 +- documentcloud/sidekick/__init__.py | 0 documentcloud/sidekick/apps.py | 12 - documentcloud/sidekick/choices.py | 11 - documentcloud/sidekick/lego.py | 139 ---- documentcloud/sidekick/local_tasks.py | 10 - .../sidekick/migrations/0001_initial.py | 25 - .../migrations/0002_auto_20210723_2029.py | 22 - documentcloud/sidekick/migrations/__init__.py | 0 documentcloud/sidekick/models.py | 45 -- documentcloud/sidekick/routers.py | 34 - documentcloud/sidekick/rules.py | 24 - documentcloud/sidekick/serializers.py | 29 - documentcloud/sidekick/sidekick.py | 0 documentcloud/sidekick/signals.py | 18 - documentcloud/sidekick/tasks.py | 164 ----- documentcloud/sidekick/tests/__init__.py | 0 documentcloud/sidekick/tests/test_views.py | 114 ---- documentcloud/sidekick/views.py | 82 --- requirements/base.in | 41 +- requirements/base.txt | 206 +++--- requirements/local.in | 6 +- requirements/local.txt | 614 +++++++++--------- requirements/production.in | 1 + requirements/production.txt | 508 +++++++-------- 44 files changed, 1000 insertions(+), 1844 deletions(-) delete mode 100644 documentcloud/documents/processing/sidekick/__init__.py delete mode 120000 documentcloud/documents/processing/sidekick/common delete mode 100644 documentcloud/documents/processing/sidekick/main.py delete mode 100644 documentcloud/documents/processing/sidekick/requirements.txt create mode 100644 documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py create mode 100644 documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py delete mode 100644 documentcloud/sidekick/__init__.py delete mode 100644 documentcloud/sidekick/apps.py delete mode 100644 documentcloud/sidekick/choices.py delete mode 100644 documentcloud/sidekick/lego.py delete mode 100644 documentcloud/sidekick/local_tasks.py delete mode 100644 documentcloud/sidekick/migrations/0001_initial.py delete mode 100644 documentcloud/sidekick/migrations/0002_auto_20210723_2029.py delete mode 100644 documentcloud/sidekick/migrations/__init__.py delete mode 100644 documentcloud/sidekick/models.py delete mode 100644 documentcloud/sidekick/routers.py delete mode 100644 documentcloud/sidekick/rules.py delete mode 100644 documentcloud/sidekick/serializers.py delete mode 100644 documentcloud/sidekick/sidekick.py delete mode 100644 documentcloud/sidekick/signals.py delete mode 100644 documentcloud/sidekick/tasks.py delete mode 100644 documentcloud/sidekick/tests/__init__.py delete mode 100644 documentcloud/sidekick/tests/test_views.py delete mode 100644 documentcloud/sidekick/views.py diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile index b82c1d06..3d1f9941 100644 --- a/compose/local/django/Dockerfile +++ b/compose/local/django/Dockerfile @@ -1,25 +1,19 @@ -# FROM python:3.6-alpine -FROM matthewfeickert/docker-python3-ubuntu:3.10.5 - +FROM python:3.12-slim ENV PYTHONUNBUFFERED 1 - USER root RUN apt-get -qq -y update && \ apt-get -qq -y install \ - # Pip dependencies - python3-pip \ + # Build dependencies + build-essential \ # Postgres dependencies libpq-dev postgresql-client python3-psycopg2 \ # Tesseract dependencies - libjpeg-turbo8 libtiff5 \ + libjpeg62-turbo libtiff6 \ # LibreOffice dependencies - libnss3-dev libcurl4-nss-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev && \ - # Symlink bash and python - ln -sf bash /bin/sh && rm -f /usr/bin/python && \ - ln -s /usr/bin/python3 /usr/bin/python && \ - curl https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py && \ - python3 get-pip.py --force-reinstall + libnss3-dev libcurl4-openssl-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev \ + # Utilities + curl git # set up makecert root CA RUN curl http://localhost/rootCA.pem > /usr/local/share/ca-certificates/rootCA.crt && update-ca-certificates @@ -55,4 +49,4 @@ WORKDIR /app ENV LD_LIBRARY_PATH /app/documentcloud/documents/processing/ocr/tesseract -ENTRYPOINT ["/entrypoint"] +ENTRYPOINT ["/entrypoint"] \ No newline at end of file diff --git a/compose/production/django/Dockerfile b/compose/production/django/Dockerfile index 055b12be..6cea765b 100644 --- a/compose/production/django/Dockerfile +++ b/compose/production/django/Dockerfile @@ -1,19 +1,16 @@ - -FROM python:3.6-alpine - +FROM python:3.12-slim ENV PYTHONUNBUFFERED 1 -RUN apk update \ +RUN apt-get -qq -y update && apt-get -qq -y install \ # psycopg2 dependencies - && apk add --virtual build-deps gcc python3-dev musl-dev \ - && apk add postgresql-dev \ + gcc python3-dev libpq-dev \ # Pillow dependencies - && apk add jpeg-dev zlib-dev freetype-dev lcms2-dev openjpeg-dev tiff-dev tk-dev tcl-dev \ + libjpeg62-turbo zlib1g-dev libfreetype6-dev liblcms2-dev libopenjp2-7-dev libtiff6 tk-dev tcl-dev \ # CFFI dependencies - && apk add libffi-dev py-cffi + libffi-dev python3-cffi -RUN addgroup -S django \ - && adduser -S -G django django +RUN groupadd -r django \ + && useradd -r -g django django # Requirements are installed here to ensure they will be cached. COPY ./requirements /requirements diff --git a/config/settings/base.py b/config/settings/base.py index 3106e21f..4b65621f 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -93,7 +93,6 @@ "documentcloud.addons.apps.AddOnsConfig", "documentcloud.projects.apps.ProjectsConfig", "documentcloud.statistics.apps.StatisticsConfig", - "documentcloud.sidekick.apps.SidekickConfig", "documentcloud.users.apps.UsersConfig", "documentcloud.entities.apps.EntitiesConfig", ] @@ -497,7 +496,7 @@ PROGRESS_URL = env("PROGRESS_URL", default="") IMPORT_URL = env("IMPORT_URL", default="") PROGRESS_TIMEOUT = env.int("PROGRESS_TIMEOUT", default=1) -SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") +#SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") # Auth LOGIN_URL = "/accounts/login/squarelet" diff --git a/config/settings/local.py b/config/settings/local.py index a9cfe46e..7538f9f5 100644 --- a/config/settings/local.py +++ b/config/settings/local.py @@ -53,7 +53,7 @@ "debug_toolbar.panels.profiling.ProfilingPanel", ], "SHOW_TEMPLATE_CONTEXT": True, - "SHOW_TOOLBAR_CALLBACK": lambda _: True, + "SHOW_TOOLBAR_CALLBACK": lambda _: False, } # Celery diff --git a/config/urls.py b/config/urls.py index 37ebacc1..78d935ca 100644 --- a/config/urls.py +++ b/config/urls.py @@ -49,8 +49,7 @@ ProjectMembershipViewSet, ProjectViewSet, ) -from documentcloud.sidekick.routers import SidekickRouter -from documentcloud.sidekick.views import SidekickViewSet + from documentcloud.statistics.views import StatisticsViewSet from documentcloud.users.views import MessageView, UserViewSet @@ -76,8 +75,8 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): documents_router.register("notes", NoteViewSet) documents_router.register("sections", SectionViewSet) documents_router.register("entities", EntityOccurrenceViewSet) -documents_router.register("legacy_entities_2", LegacyEntity2ViewSet) -documents_router.register("legacy_entities", LegacyEntityViewSet) +documents_router.register("legacy_entities_2", LegacyEntity2ViewSet, basename="legacyentity2") +documents_router.register("legacy_entities", LegacyEntityViewSet, basename="legacyentity") documents_router.register("dates", EntityDateViewSet) documents_router.register("errors", DocumentErrorViewSet) documents_router.register("data", DataViewSet, basename="data") @@ -90,8 +89,7 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): projects_router.register("documents", ProjectMembershipViewSet) projects_router.register("users", CollaborationViewSet) -sidekick_router = SidekickRouter(router, "projects", lookup="project") -sidekick_router.register("sidekick", SidekickViewSet) + router.register("documents/search/saved", SavedSearchViewSet, basename="saved_search") @@ -101,7 +99,6 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): path("api/", include(router.urls)), path("api/", include(documents_router.urls)), path("api/", include(projects_router.urls)), - path("api/", include(sidekick_router.urls)), path("api/schema/", SpectacularAPIView.as_view(), name="schema"), path( "api/schema/redoc/", diff --git a/documentcloud/addons/choices.py b/documentcloud/addons/choices.py index 031b9e85..9130f337 100644 --- a/documentcloud/addons/choices.py +++ b/documentcloud/addons/choices.py @@ -1,14 +1,19 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Event(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class Event(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API - disabled = ChoiceItem(0, _("Disabled"), api=True) - hourly = ChoiceItem(1, _("Hourly"), api=True) - daily = ChoiceItem(2, _("Daily"), api=True) - weekly = ChoiceItem(3, _("Weekly"), api=True) - upload = ChoiceItem(4, _("Upload"), api=True) + disabled = 0, _("Disabled"), True + hourly = 1, _("Hourly"), True + daily = 2, _("Daily"), True + weekly = 3, _("Weekly"), True + upload = 4, _("Upload"), True \ No newline at end of file diff --git a/documentcloud/core/choices.py b/documentcloud/core/choices.py index 6acd9462..680b410a 100644 --- a/documentcloud/core/choices.py +++ b/documentcloud/core/choices.py @@ -1,122 +1,116 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Language(models.TextChoices): + def __new__(cls, value, label=None, ocr_code=None): + obj = str.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.ocr_code = ocr_code if ocr_code is not None else value + return obj -class Language(DjangoChoices): - afrikaans = ChoiceItem("afr", _("Afrikaans"), ocr_code="afr") - amharic = ChoiceItem("amh", _("Amharic"), ocr_code="amh") - arabic = ChoiceItem("ara", _("Arabic"), ocr_code="ara") - assamese = ChoiceItem("asm", _("Assamese"), ocr_code="asm") - azerbaijani = ChoiceItem("aze", _("Azerbaijani"), ocr_code="aze") - azerbaijani_cyrillic = ChoiceItem( - "aze_cyrl", _("Azerbaijani - Cyrillic"), ocr_code="aze_cyrl" - ) - belarusian = ChoiceItem("bel", _("Belarusian"), ocr_code="bel") - bengali = ChoiceItem("ben", _("Bengali"), ocr_code="ben") - tibetan = ChoiceItem("bod", _("Tibetan"), ocr_code="bod") - bosnian = ChoiceItem("bos", _("Bosnian"), ocr_code="bos") - bulgarian = ChoiceItem("bul", _("Bulgarian"), ocr_code="bul") - catalan_valencian = ChoiceItem("cat", _("Catalan; Valencian"), ocr_code="cat") - cebuano = ChoiceItem("ceb", _("Cebuano"), ocr_code="ceb") - czech = ChoiceItem("ces", _("Czech"), ocr_code="ces") - chinese_simplified = ChoiceItem( - "zho", _("Chinese - Simplified"), ocr_code="chi_sim" - ) - chinese_traditional = ChoiceItem( - "tra", _("Chinese - Traditional"), ocr_code="chi_tra" - ) - cherokee = ChoiceItem("chr", _("Cherokee"), ocr_code="chr") - welsh = ChoiceItem("cym", _("Welsh"), ocr_code="cym") - danish = ChoiceItem("dan", _("Danish"), ocr_code="dan") - german = ChoiceItem("deu", _("German"), ocr_code="deu") - dzongkha = ChoiceItem("dzo", _("Dzongkha"), ocr_code="dzo") - greek = ChoiceItem("ell", _("Greek"), ocr_code="ell") - english = ChoiceItem("eng", _("English"), ocr_code="eng") - middle_english = ChoiceItem("enm", _("Middle English"), ocr_code="enm") - esperanto = ChoiceItem("epo", _("Esperanto"), ocr_code="epo") - estonian = ChoiceItem("est", _("Estonian"), ocr_code="est") - basque = ChoiceItem("eus", _("Basque"), ocr_code="eus") - persian = ChoiceItem("fas", _("Persian"), ocr_code="fas") - finnish = ChoiceItem("fin", _("Finnish"), ocr_code="fin") - french = ChoiceItem("fra", _("French"), ocr_code="fra") - german_fraktur = ChoiceItem("frk", _("German Fraktur"), ocr_code="frk") - middle_french = ChoiceItem("frm", _("Middle French"), ocr_code="frm") - irish = ChoiceItem("gle", _("Irish"), ocr_code="gle") - galician = ChoiceItem("glg", _("Galician"), ocr_code="glg") - ancient_greek = ChoiceItem("grc", _("Ancient Greek"), ocr_code="grc") - gujarati = ChoiceItem("guj", _("Gujarati"), ocr_code="guj") - haitian_haitian_creole = ChoiceItem( - "hat", _("Haitian; Haitian Creole"), ocr_code="hat" - ) - hebrew = ChoiceItem("heb", _("Hebrew"), ocr_code="heb") - hindi = ChoiceItem("hin", _("Hindi"), ocr_code="hin") - croatian = ChoiceItem("hrv", _("Croatian"), ocr_code="hrv") - hungarian = ChoiceItem("hun", _("Hungarian"), ocr_code="hun") - inuktitut = ChoiceItem("iku", _("Inuktitut"), ocr_code="iku") - indonesian = ChoiceItem("ind", _("Indonesian"), ocr_code="ind") - icelandic = ChoiceItem("isl", _("Icelandic"), ocr_code="isl") - italian = ChoiceItem("ita", _("Italian"), ocr_code="ita") - italian_old = ChoiceItem("ita_old", _("Italian - Old"), ocr_code="ita_old") - javanese = ChoiceItem("jav", _("Javanese"), ocr_code="jav") - japanese = ChoiceItem("jpn", _("Japanese"), ocr_code="jpn") - kannada = ChoiceItem("kan", _("Kannada"), ocr_code="kan") - georgian = ChoiceItem("kat", _("Georgian"), ocr_code="kat") - georgian_old = ChoiceItem("kat_old", _("Georgian - Old"), ocr_code="kat_old") - kazakh = ChoiceItem("kaz", _("Kazakh"), ocr_code="kaz") - central_khmer = ChoiceItem("khm", _("Central Khmer"), ocr_code="khm") - kirghiz_kyrgyz = ChoiceItem("kir", _("Kirghiz; Kyrgyz"), ocr_code="kir") - korean = ChoiceItem("kor", _("Korean"), ocr_code="kor") - kurdish = ChoiceItem("kur", _("Kurdish"), ocr_code="kur") - lao = ChoiceItem("lao", _("Lao"), ocr_code="lao") - latin = ChoiceItem("lat", _("Latin"), ocr_code="lat") - latvian = ChoiceItem("lav", _("Latvian"), ocr_code="lav") - lithuanian = ChoiceItem("lit", _("Lithuanian"), ocr_code="lit") - malayalam = ChoiceItem("mal", _("Malayalam"), ocr_code="mal") - marathi = ChoiceItem("mar", _("Marathi"), ocr_code="mar") - macedonian = ChoiceItem("mkd", _("Macedonian"), ocr_code="mkd") - maltese = ChoiceItem("mlt", _("Maltese"), ocr_code="mlt") - malay = ChoiceItem("msa", _("Malay"), ocr_code="msa") - burmese = ChoiceItem("mya", _("Burmese"), ocr_code="mya") - nepali = ChoiceItem("nep", _("Nepali"), ocr_code="nep") - dutch_flemish = ChoiceItem("nld", _("Dutch; Flemish"), ocr_code="nld") - norwegian = ChoiceItem("nor", _("Norwegian"), ocr_code="nor") - oriya = ChoiceItem("ori", _("Oriya"), ocr_code="ori") - panjabi_punjabi = ChoiceItem("pan", _("Panjabi; Punjabi"), ocr_code="pan") - polish = ChoiceItem("pol", _("Polish"), ocr_code="pol") - portuguese = ChoiceItem("por", _("Portuguese"), ocr_code="por") - pushto_pashto = ChoiceItem("pus", _("Pushto; Pashto"), ocr_code="pus") - romanian_moldavian_moldovan = ChoiceItem( - "ron", _("Romanian; Moldavian; Moldovan"), ocr_code="ron" - ) - russian = ChoiceItem("rus", _("Russian"), ocr_code="rus") - sanskrit = ChoiceItem("san", _("Sanskrit"), ocr_code="san") - sinhala_sinhalese = ChoiceItem("sin", _("Sinhala; Sinhalese"), ocr_code="sin") - slovak = ChoiceItem("slk", _("Slovak"), ocr_code="slk") - slovenian = ChoiceItem("slv", _("Slovenian"), ocr_code="slv") - spanish_castilian = ChoiceItem("spa", _("Spanish; Castilian"), ocr_code="spa") - spanish_castilian_old = ChoiceItem( - "spa_old", _("Spanish; Castilian - Old"), ocr_code="spa_old" - ) - albanian = ChoiceItem("sqi", _("Albanian"), ocr_code="sqi") - serbian = ChoiceItem("srp", _("Serbian"), ocr_code="srp") - serbian_latin = ChoiceItem("srp_latn", _("Serbian - Latin"), ocr_code="srp_latn") - swahili = ChoiceItem("swa", _("Swahili"), ocr_code="swa") - swedish = ChoiceItem("swe", _("Swedish"), ocr_code="swe") - syriac = ChoiceItem("syr", _("Syriac"), ocr_code="syr") - tamil = ChoiceItem("tam", _("Tamil"), ocr_code="tam") - telugu = ChoiceItem("tel", _("Telugu"), ocr_code="tel") - tajik = ChoiceItem("tgk", _("Tajik"), ocr_code="tgk") - tagalog = ChoiceItem("tgl", _("Tagalog"), ocr_code="tgl") - thai = ChoiceItem("tha", _("Thai"), ocr_code="tha") - tigrinya = ChoiceItem("tir", _("Tigrinya"), ocr_code="tir") - turkish = ChoiceItem("tur", _("Turkish"), ocr_code="tur") - uighur_uyghur = ChoiceItem("uig", _("Uighur; Uyghur"), ocr_code="uig") - ukrainian = ChoiceItem("ukr", _("Ukrainian"), ocr_code="ukr") - urdu = ChoiceItem("urd", _("Urdu"), ocr_code="urd") - uzbek = ChoiceItem("uzb", _("Uzbek"), ocr_code="uzb") - uzbek_cyrillic = ChoiceItem("uzb_cyrl", _("Uzbek - Cyrillic"), ocr_code="uzb_cyrl") - vietnamese = ChoiceItem("vie", _("Vietnamese"), ocr_code="vie") - yiddish = ChoiceItem("yid", _("Yiddish"), ocr_code="yid") + afrikaans = "afr", _("Afrikaans") + amharic = "amh", _("Amharic") + arabic = "ara", _("Arabic") + assamese = "asm", _("Assamese") + azerbaijani = "aze", _("Azerbaijani") + azerbaijani_cyrillic = "aze_cyrl", _("Azerbaijani - Cyrillic") + belarusian = "bel", _("Belarusian") + bengali = "ben", _("Bengali") + tibetan = "bod", _("Tibetan") + bosnian = "bos", _("Bosnian") + bulgarian = "bul", _("Bulgarian") + catalan_valencian = "cat", _("Catalan; Valencian") + cebuano = "ceb", _("Cebuano") + czech = "ces", _("Czech") + chinese_simplified = "zho", _("Chinese - Simplified"), "chi_sim" + chinese_traditional = "tra", _("Chinese - Traditional"), "chi_tra" + cherokee = "chr", _("Cherokee") + welsh = "cym", _("Welsh") + danish = "dan", _("Danish") + german = "deu", _("German") + dzongkha = "dzo", _("Dzongkha") + greek = "ell", _("Greek") + english = "eng", _("English") + middle_english = "enm", _("Middle English") + esperanto = "epo", _("Esperanto") + estonian = "est", _("Estonian") + basque = "eus", _("Basque") + persian = "fas", _("Persian") + finnish = "fin", _("Finnish") + french = "fra", _("French") + german_fraktur = "frk", _("German Fraktur") + middle_french = "frm", _("Middle French") + irish = "gle", _("Irish") + galician = "glg", _("Galician") + ancient_greek = "grc", _("Ancient Greek") + gujarati = "guj", _("Gujarati") + haitian_haitian_creole = "hat", _("Haitian; Haitian Creole") + hebrew = "heb", _("Hebrew") + hindi = "hin", _("Hindi") + croatian = "hrv", _("Croatian") + hungarian = "hun", _("Hungarian") + inuktitut = "iku", _("Inuktitut") + indonesian = "ind", _("Indonesian") + icelandic = "isl", _("Icelandic") + italian = "ita", _("Italian") + italian_old = "ita_old", _("Italian - Old") + javanese = "jav", _("Javanese") + japanese = "jpn", _("Japanese") + kannada = "kan", _("Kannada") + georgian = "kat", _("Georgian") + georgian_old = "kat_old", _("Georgian - Old") + kazakh = "kaz", _("Kazakh") + central_khmer = "khm", _("Central Khmer") + kirghiz_kyrgyz = "kir", _("Kirghiz; Kyrgyz") + korean = "kor", _("Korean") + kurdish = "kur", _("Kurdish") + lao = "lao", _("Lao") + latin = "lat", _("Latin") + latvian = "lav", _("Latvian") + lithuanian = "lit", _("Lithuanian") + malayalam = "mal", _("Malayalam") + marathi = "mar", _("Marathi") + macedonian = "mkd", _("Macedonian") + maltese = "mlt", _("Maltese") + malay = "msa", _("Malay") + burmese = "mya", _("Burmese") + nepali = "nep", _("Nepali") + dutch_flemish = "nld", _("Dutch; Flemish") + norwegian = "nor", _("Norwegian") + oriya = "ori", _("Oriya") + panjabi_punjabi = "pan", _("Panjabi; Punjabi") + polish = "pol", _("Polish") + portuguese = "por", _("Portuguese") + pushto_pashto = "pus", _("Pushto; Pashto") + romanian_moldavian_moldovan = "ron", _("Romanian; Moldavian; Moldovan") + russian = "rus", _("Russian") + sanskrit = "san", _("Sanskrit") + sinhala_sinhalese = "sin", _("Sinhala; Sinhalese") + slovak = "slk", _("Slovak") + slovenian = "slv", _("Slovenian") + spanish_castilian = "spa", _("Spanish; Castilian") + spanish_castilian_old = "spa_old", _("Spanish; Castilian - Old") + albanian = "sqi", _("Albanian") + serbian = "srp", _("Serbian") + serbian_latin = "srp_latn", _("Serbian - Latin") + swahili = "swa", _("Swahili") + swedish = "swe", _("Swedish") + syriac = "syr", _("Syriac") + tamil = "tam", _("Tamil") + telugu = "tel", _("Telugu") + tajik = "tgk", _("Tajik") + tagalog = "tgl", _("Tagalog") + thai = "tha", _("Thai") + tigrinya = "tir", _("Tigrinya") + turkish = "tur", _("Turkish") + uighur_uyghur = "uig", _("Uighur; Uyghur") + ukrainian = "ukr", _("Ukrainian") + urdu = "urd", _("Urdu") + uzbek = "uzb", _("Uzbek") + uzbek_cyrillic = "uzb_cyrl", _("Uzbek - Cyrillic") + vietnamese = "vie", _("Vietnamese") + yiddish = "yid", _("Yiddish") \ No newline at end of file diff --git a/documentcloud/core/filters.py b/documentcloud/core/filters.py index 05b971d1..a2e68485 100644 --- a/documentcloud/core/filters.py +++ b/documentcloud/core/filters.py @@ -71,12 +71,12 @@ class ModelMultipleChoiceFilter( class ChoicesFilter(django_filters.TypedMultipleChoiceFilter): """A choices filter configured to work how we want our choice filters to work - `choices` kwarg should be an instanceof DjangoChoices + `choices` kwarg should be an instance of IntegerChoices/TextChoices """ def __init__(self, *args, **kwargs): choices = kwargs.pop("choices") - kwargs["choices"] = list(choices.labels.items()) - kwargs["coerce"] = lambda x: getattr(choices, x) + kwargs["choices"] = [(member.name, member.label) for member in choices] + kwargs["coerce"] = lambda x: next(m for m in choices if m.name == x) kwargs["widget"] = QueryArrayWidget super().__init__(*args, **kwargs) diff --git a/documentcloud/documents/choices.py b/documentcloud/documents/choices.py index 91bcf23d..a2931f00 100644 --- a/documentcloud/documents/choices.py +++ b/documentcloud/documents/choices.py @@ -1,55 +1,82 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Access(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj - -class Access(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API # Free and public to all. - public = ChoiceItem(0, _("Public"), api=True) + public = 0, _("Public"), True # Visible to both the owner and her organization. - organization = ChoiceItem(1, _("Organization"), api=True) + organization = 1, _("Organization"), True # The document is only visible to its owner. - private = ChoiceItem(2, _("Private"), api=True) + private = 2, _("Private"), True # The document has been taken down (perhaps temporary). - invisible = ChoiceItem(3, _("Invisible"), api=False) + invisible = 3, _("Invisible"), False + +class Status(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class Status(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API - # The document is in a normal succesful state - success = ChoiceItem(0, _("Success"), api=True) + # The document is in a normal successful state + success = 0, _("Success"), True # The document is processing, but readable during the operation - readable = ChoiceItem(1, _("Readable"), api=True) + readable = 1, _("Readable"), True # The document is processing, and not currently readable - pending = ChoiceItem(2, _("Pending"), api=True) + pending = 2, _("Pending"), True # There was an error processing the document - error = ChoiceItem(3, _("Error"), api=True) + error = 3, _("Error"), True # There is no file yet - nofile = ChoiceItem(4, _("No file"), api=True) + nofile = 4, _("No file"), True # The file is deleted - deleted = ChoiceItem(5, _("Deleted"), api=False) - - -class EntityKind(DjangoChoices): - unknown = ChoiceItem(0, _("Unknown"), api=True) - person = ChoiceItem(1, _("Person"), api=True) - location = ChoiceItem(2, _("Location"), api=True) - organization = ChoiceItem(3, _("Organization"), api=True) - event = ChoiceItem(4, _("Event"), api=True) - work_of_art = ChoiceItem(5, _("Work_Of_Art"), api=True) - consumer_good = ChoiceItem(6, _("Consumer_Good"), api=True) - other = ChoiceItem(7, _("Other"), api=True) - phone_number = ChoiceItem(9, _("Phone_Number"), api=True) - address = ChoiceItem(10, _("Address"), api=True) - date = ChoiceItem(11, _("Date"), api=True) - number = ChoiceItem(12, _("Number"), api=True) - price = ChoiceItem(13, _("Price"), api=True) - - -class OccurrenceKind(DjangoChoices): - unknown = ChoiceItem(0, _("Unknown"), api=True) - proper = ChoiceItem(1, _("Proper"), api=True) - common = ChoiceItem(2, _("Common"), api=True) + deleted = 5, _("Deleted"), False + + +class EntityKind(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj + + unknown = 0, _("Unknown"), True + person = 1, _("Person"), True + location = 2, _("Location"), True + organization = 3, _("Organization"), True + event = 4, _("Event"), True + work_of_art = 5, _("Work_Of_Art"), True + consumer_good = 6, _("Consumer_Good"), True + other = 7, _("Other"), True + phone_number = 9, _("Phone_Number"), True + address = 10, _("Address"), True + date = 11, _("Date"), True + number = 12, _("Number"), True + price = 13, _("Price"), True + + +class OccurrenceKind(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj + + unknown = 0, _("Unknown"), True + proper = 1, _("Proper"), True + common = 2, _("Common"), True \ No newline at end of file diff --git a/documentcloud/documents/fields.py b/documentcloud/documents/fields.py index da3145ae..125b0426 100644 --- a/documentcloud/documents/fields.py +++ b/documentcloud/documents/fields.py @@ -4,12 +4,11 @@ class ChoiceField(serializers.ChoiceField): """Choice field enhanced to use the choices label and ability to omit choices""" - def __init__(self, choices, **kwargs): choices = [ - (choice.value, label) - for label, choice in choices._fields.items() - if choice.api + (member.value, member.name) + for member in choices + if member.api ] self.choice_map = {label: value for value, label in choices} super().__init__(choices, **kwargs) @@ -23,8 +22,7 @@ def to_representation(self, value): def to_internal_value(self, data): if data == "" and self.allow_blank: return "" - try: return self.choice_map[str(data)] except KeyError: - self.fail("invalid_choice", input=data) + self.fail("invalid_choice", input=data) \ No newline at end of file diff --git a/documentcloud/documents/models/document.py b/documentcloud/documents/models/document.py index a61eccd4..441a6ae7 100644 --- a/documentcloud/documents/models/document.py +++ b/documentcloud/documents/models/document.py @@ -666,8 +666,8 @@ def page_filter(text): "type": "document", "user": self.user_id, "organization": self.organization_id, - "access": Access.attributes[self.access], - "status": Status.attributes[self.status], + "access": Access(self.access).name, + "status": Status(self.status).name, "title": self.title, "slug": self.slug, "source": self.source, diff --git a/documentcloud/documents/processing/sidekick/__init__.py b/documentcloud/documents/processing/sidekick/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/documents/processing/sidekick/common b/documentcloud/documents/processing/sidekick/common deleted file mode 120000 index f74dff0e..00000000 --- a/documentcloud/documents/processing/sidekick/common +++ /dev/null @@ -1 +0,0 @@ -../../../common \ No newline at end of file diff --git a/documentcloud/documents/processing/sidekick/main.py b/documentcloud/documents/processing/sidekick/main.py deleted file mode 100644 index 133561ae..00000000 --- a/documentcloud/documents/processing/sidekick/main.py +++ /dev/null @@ -1,184 +0,0 @@ -# Standard Library -import logging -import os -import re -from collections import Counter -from urllib.parse import urljoin - -# Third Party -import environ -import numpy as np -import requests -import sklearn.decomposition -from sklearn.feature_extraction.text import TfidfVectorizer - -env = environ.Env() -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -# pylint: disable=import-error - -# Imports based on execution context -if env.str("ENVIRONMENT").startswith("local"): - # DocumentCloud - from documentcloud.common import path - from documentcloud.common.environment import get_pubsub_data, publisher, storage - from documentcloud.common.serverless import utils - from documentcloud.common.serverless.error_handling import pubsub_function -else: - # Third Party - # only initialize sentry on serverless - import sentry_sdk - from common import path - from common.environment import get_pubsub_data, publisher, storage - from common.serverless import utils - from common.serverless.error_handling import pubsub_function - from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration - from sentry_sdk.integrations.redis import RedisIntegration - - sentry_sdk.init( - dsn=env("SENTRY_DSN"), integrations=[AwsLambdaIntegration(), RedisIntegration()] - ) - -REDIS = utils.get_redis() -API_CALLBACK = env.str("API_CALLBACK") -PROCESSING_TOKEN = env.str("PROCESSING_TOKEN") -VOCAB_SIZE = env.int("VOCAB_SIZE", default=30000) -TOKEN_PATTERN = re.compile(r"(?u)\b\w\w+\b") -EMBEDDING_DIR = env.str("EMBEDDING_DIR", default="embedding") - -SIDEKICK_PREPROCESS_TOPIC = publisher.topic_path( - "documentcloud", - env.str("SIDEKICK_PREPROCESS_TOPIC", default="sidekick-preprocess-topic"), -) - -LANGUAGES = {"eng": "en"} - - -def send_sidekick_update(project_id, json): - """Send an update to the API server for sidekick""" - utils.request(REDIS, "patch", f"projects/{project_id}/sidekick/", json) - - -def load_documents(project_id): - """Load the documents - - Fetch their IDs, slugs and languages via the API - Use the ID and slug to fetch the text from S3 - Return the most common language among the documents as the language to use - for the word embeddings - """ - - logger.info( - "[SIDEKICK PREPROCESS] project_id: %s - fetching project documents", project_id - ) - file_names = [] - languages = Counter() - doc_ids = [] - next_ = urljoin(API_CALLBACK, f"projects/{project_id}/documents/?expand=document") - - while next_: - response = requests.get( - next_, headers={"Authorization": f"processing-token {PROCESSING_TOKEN}"} - ) - response.raise_for_status() - response_json = response.json() - next_ = response_json["next"] - for result in response_json["results"]: - file_names.append( - path.text_path(result["document"]["id"], result["document"]["slug"]) - ) - languages.update([result["document"]["language"]]) - doc_ids.append(result["document"]["id"]) - - language = languages.most_common()[0][0] - - # download the files in parallel - texts = storage.async_download(file_names) - - return texts, doc_ids, language - - -def process_text(project_id, texts): - """Calculate the vocabulary for the corpus based on the document texts""" - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - calculating vocab", project_id) - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - tfidf", project_id) - - # Derive tf-idf data on corpus - vectorizer = TfidfVectorizer( - strip_accents="unicode", stop_words=None, max_features=VOCAB_SIZE - ) - - tfidf = vectorizer.fit_transform(texts) - features = vectorizer.get_feature_names() - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - svd", project_id) - - # Project tf-idf data down in dimensionality - svd_transformer = sklearn.decomposition.TruncatedSVD( - 300, algorithm="randomized", n_iter=5 - ) - doc_svd = svd_transformer.fit_transform(tfidf) - - return tfidf, features, doc_svd - - -def doc_embedding(project_id, language, tfidf, features, doc_svd): - """Calculate the doc embeddings""" - # Third Party - import fasttext - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - doc embeddings", project_id) - - # Load the embedding model - # error if language not present - language = LANGUAGES.get(language, language) - model = fasttext.load_model(os.path.join(EMBEDDING_DIR, f"cc.{language}.300.bin")) - embedding_vectors = np.array( - [model.get_word_vector(feature) for feature in features] - ) - - # scale embedding vectors based on frequency of the words - doc_embeddings = np.dot(tfidf.A, embedding_vectors) - - # Doc vectors are just doc_svd and doc_embeddings concatenated - doc_vectors = np.hstack((doc_svd, doc_embeddings)) - - # Serialize doc vectors to file - with storage.open( - path.sidekick_document_vectors_path(project_id), "wb" - ) as vectors_file: - np.savez_compressed(vectors_file, doc_vectors) - - -def doc_embedding_(project_id, _language, _tfidf, _features, doc_svd, doc_ids): - """Simpler doc embeddings - skip word vectors and just use the doc svd""" - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - doc embeddings", project_id) - - # Serialize doc vectors to file - with storage.open( - path.sidekick_document_vectors_path(project_id), "wb" - ) as vectors_file: - np.savez_compressed(vectors_file, vectors=doc_svd, ids=doc_ids) - - -@pubsub_function(REDIS, SIDEKICK_PREPROCESS_TOPIC) -def preprocess(data, _context=None): - """Preprocess the documents in a project for sidekick""" - - data = get_pubsub_data(data) - project_id = data["project_id"] - - logger.info("[SIDEKICK PREPROCESS] project_id: %s", project_id) - - try: - texts, doc_ids, language = load_documents(project_id) - tfidf, features, doc_svd = process_text(project_id, texts) - doc_embedding_(project_id, language, tfidf, features, doc_svd, doc_ids) - except Exception: # pylint: disable=broad-except - send_sidekick_update(project_id, {"status": "error"}) - else: - send_sidekick_update(project_id, {"status": "success"}) diff --git a/documentcloud/documents/processing/sidekick/requirements.txt b/documentcloud/documents/processing/sidekick/requirements.txt deleted file mode 100644 index 68379dfc..00000000 --- a/documentcloud/documents/processing/sidekick/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ --r cloud-requirements.txt - -aioboto3==6.5.0 -django-environ==0.4.5 -furl==2.1.0 -pebble==4.5.0 -redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 diff --git a/documentcloud/documents/views.py b/documentcloud/documents/views.py index dd8c9254..bc016f32 100644 --- a/documentcloud/documents/views.py +++ b/documentcloud/documents/views.py @@ -116,7 +116,7 @@ class DocumentViewSet(BulkModelMixin, FlexFieldsModelViewSet): serializer_class = DocumentSerializer queryset = Document.objects.none() permission_classes = ( - DjangoObjectPermissionsOrAnonReadOnly | DocumentTokenPermissions, + IsAuthenticated| DocumentTokenPermissions, ) @extend_schema( diff --git a/documentcloud/entities/choices.py b/documentcloud/entities/choices.py index bbbf16ed..e878bcc7 100644 --- a/documentcloud/entities/choices.py +++ b/documentcloud/entities/choices.py @@ -1,13 +1,18 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class EntityAccess(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class EntityAccess(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API # Free and public to all. - public = ChoiceItem(0, _("Public"), api=True) + public = 0, _("Public"), True # Visible to both the owner and her organization. - private = ChoiceItem(2, _("Private"), api=True) + private = 2, _("Private"), True \ No newline at end of file diff --git a/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py b/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py new file mode 100644 index 00000000..be1679e7 --- /dev/null +++ b/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.2 on 2026-01-21 20:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('organizations', '0019_organization_members_organization_parent_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='organization', + name='members', + ), + migrations.RemoveField( + model_name='organization', + name='parent', + ), + migrations.RemoveField( + model_name='organization', + name='share_resources', + ), + ] diff --git a/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py b/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py new file mode 100644 index 00000000..e26b4cfd --- /dev/null +++ b/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py @@ -0,0 +1,47 @@ +# Generated by Django 4.2.2 on 2026-01-21 21:29 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("organizations", "0020_remove_organization_members_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="organization", + name="members", + field=models.ManyToManyField( + blank=True, + help_text="Organizations which are members of this organization (useful for trade associations or other member groups)", + related_name="groups", + to=settings.SQUARELET_ORGANIZATION_MODEL, + ), + ), + migrations.AddField( + model_name="organization", + name="parent", + field=models.ForeignKey( + blank=True, + help_text="The parent organization", + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="children", + to=settings.SQUARELET_ORGANIZATION_MODEL, + verbose_name="parent", + ), + ), + migrations.AddField( + model_name="organization", + name="share_resources", + field=models.BooleanField( + default=True, + help_text="Share resources (subscriptions, credits) with all children and member organizations. Global toggle that applies to all relationships.", + verbose_name="share resources", + ), + ), + ] diff --git a/documentcloud/projects/choices.py b/documentcloud/projects/choices.py index 992f11b2..9cedb8a2 100644 --- a/documentcloud/projects/choices.py +++ b/documentcloud/projects/choices.py @@ -1,15 +1,20 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class CollaboratorAccess(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class CollaboratorAccess(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API # This collaborator has read access - view = ChoiceItem(0, _("View"), api=True) + view = 0, _("View"), True # This collaborator can edit the documents in the project - edit = ChoiceItem(1, _("Edit"), api=True) - # This collaborator can edit the documents and the project itself - admin = ChoiceItem(2, _("Admin"), api=True) + edit = 1, _("Edit"), True + # This collaborator can edit the documents and the project itself + admin = 2, _("Admin"), True \ No newline at end of file diff --git a/documentcloud/sidekick/__init__.py b/documentcloud/sidekick/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/apps.py b/documentcloud/sidekick/apps.py deleted file mode 100644 index 1a707e73..00000000 --- a/documentcloud/sidekick/apps.py +++ /dev/null @@ -1,12 +0,0 @@ -# Django -from django.apps import AppConfig - - -class SidekickConfig(AppConfig): - name = "documentcloud.sidekick" - - def ready(self): - # pylint: disable=unused-import - # load signals - # DocumentCloud - import documentcloud.sidekick.signals diff --git a/documentcloud/sidekick/choices.py b/documentcloud/sidekick/choices.py deleted file mode 100644 index 80e6c0b3..00000000 --- a/documentcloud/sidekick/choices.py +++ /dev/null @@ -1,11 +0,0 @@ -# Django -from django.utils.translation import gettext_lazy as _ - -# Third Party -from djchoices import ChoiceItem, DjangoChoices - - -class Status(DjangoChoices): - success = ChoiceItem(0, _("Success"), api=True) - pending = ChoiceItem(1, _("Pending"), api=True) - error = ChoiceItem(2, _("Error"), api=True) diff --git a/documentcloud/sidekick/lego.py b/documentcloud/sidekick/lego.py deleted file mode 100644 index 7dc52604..00000000 --- a/documentcloud/sidekick/lego.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Implements metric lego learning""" - -# Adapted from https://bitbucket.org/muckdoc/muckdoc/ -# Look into faster version, e.g. from -# https://github.com/fcaldas/MetricLearning/blob/master/lego_functions.py - -# Third Party -import numba -import numpy as np -from scipy import stats - -# Use mathy names in this file -# pylint: disable=invalid-name - - -def lego_learn(doc_vectors, constraints, positive_docs): - updated_doc_vectors, mean_vec = get_mean_vec( - doc_vectors, constraints, positive_docs - ) - doc_dists = fast_cosine_matrix(mean_vec, updated_doc_vectors) - doc_percentiles = stats.rankdata(doc_dists, "average") / len(doc_dists) - return doc_dists, doc_percentiles - - -@numba.njit -def update(X_i, X_j, y, A, u=7, l=10, gamma=0.08): - # pylint: disable=too-many-arguments - diff = X_i - X_j - d = np.dot(diff, np.dot(A, diff)) - if (d > u and y == 1) or (d < l and y == -1): - target = u * (y == 1) + l * (y == -1) - _y = ( - (gamma * d * target - 1) - + np.sqrt((gamma * d * target - 1) ** 2 + 4 * gamma * d * d) - ) / (2 * gamma * d) - return A - ( - (gamma * (_y - target)) / (1 + gamma * (_y - target) * d) - ) * np.outer(np.dot(A, diff), np.dot(A, diff)) - else: - return A - - -@numba.njit(parallel=True) -def fast_cosine_matrix(u, M): - # From https://stackoverflow.com/a/47316253 - scores = np.zeros(M.shape[0]) - for i in numba.prange(M.shape[0]): # pylint: disable=not-an-iterable - v = M[i] - m = u.shape[0] - udotv = 0 - u_norm = 0 - v_norm = 0 - for j in range(m): - if (np.isnan(u[j])) or (np.isnan(v[j])): - continue - - udotv += u[j] * v[j] - u_norm += u[j] * u[j] - v_norm += v[j] * v[j] - - u_norm = np.sqrt(u_norm) - v_norm = np.sqrt(v_norm) - - if (u_norm == 0) or (v_norm == 0): - ratio = 1.0 - else: - ratio = udotv / (u_norm * v_norm) - scores[i] = ratio - return scores - - -@numba.njit -def get_mean_vec_(A_updated, doc_vectors, positive_doc_vectors): - L = np.linalg.cholesky(A_updated) - # mean with axis is not supported in numba, so accomplish with sum - mean_vec = np.sum(np.dot(positive_doc_vectors, L), 0) / L.shape[0] - - # Mean vector ordered list - updated_doc_vectors = np.dot(doc_vectors, L) - return updated_doc_vectors, mean_vec - - -def get_mean_vec(doc_vectors, constraints, positive_docs): - - if len(constraints) == 0: - # No constraints, go purely off positive docs - positive_doc_vectors = doc_vectors[positive_docs] - mean_vec = np.mean(positive_doc_vectors, axis=0) - return doc_vectors, mean_vec - else: - A_updated = batch_update(doc_vectors, constraints) - return get_mean_vec_(A_updated, doc_vectors, doc_vectors[positive_docs]) - - -def lego(u, v, y, r=0.5, A_prev=None): - - m = len(u) # number of features - # make into colume vectors [m,1] - u = u[:, np.newaxis] - v = v[:, np.newaxis] - if A_prev is None: - A_prev = np.identity(m) - - # find the current distance (mahalanobis) between u and v - z = u - v - y_current = float(np.dot(z.T, np.dot(A_prev, z))) # y_hat in paper - - # find y_bar, which is an approximation of distance using the new metric - y_bar_up = ( - r * y * y_current - - 1 - + np.sqrt((r * y * y_current - 1) ** 2 + 4 * r * y_current**2) - ) - y_bar_down = 2 * r * y_current - y_bar = y_bar_up / y_bar_down - y_bar = float(np.nan_to_num(y_bar)) - - # calculate the new metric matrix A_new using y_bar - A_new_up = r * (y_bar - y) * np.dot(A_prev, np.dot(np.dot(z, z.T), A_prev)) - A_new_down = 1 + r * (y_bar - y) * y_current - A_new = A_prev - A_new_up / A_new_down - - return A_new - - -# iterates through the constraints and updates the A matrix -def batch_update(doc_vectors, constraints): - A_ = np.identity(doc_vectors.shape[1]) - - for doc_u, doc_v, same_class in constraints: - u_t = doc_vectors[doc_u] - v_t = doc_vectors[doc_v] - if same_class == 1: - y_t = 1 - else: - y_t = -1 - A_ = update(u_t, v_t, y_t, A_) - - return A_ diff --git a/documentcloud/sidekick/local_tasks.py b/documentcloud/sidekick/local_tasks.py deleted file mode 100644 index 3659d6cc..00000000 --- a/documentcloud/sidekick/local_tasks.py +++ /dev/null @@ -1,10 +0,0 @@ -# Django -from celery import shared_task - -# DocumentCloud -from documentcloud.documents.processing.sidekick.main import preprocess - - -@shared_task -def sidekick_preprocess(data): - preprocess(data) diff --git a/documentcloud/sidekick/migrations/0001_initial.py b/documentcloud/sidekick/migrations/0001_initial.py deleted file mode 100644 index 8abdd5f8..00000000 --- a/documentcloud/sidekick/migrations/0001_initial.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 2.2.5 on 2021-07-13 20:22 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ('projects', '0012_auto_20210407_1801'), - ] - - operations = [ - migrations.CreateModel( - name='Sidekick', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('status', models.IntegerField(choices=[(0, 'Uninitialized'), (1, 'Processing'), (2, 'Initialized'), (3, 'Error')], default=0, help_text='The status of this sidekick', verbose_name='status')), - ('tag_name', models.CharField(help_text='The name of the tag to use to associate a document as a positive or negative match for this sidekick', max_length=50, verbose_name='tag name')), - ('project', models.OneToOneField(help_text='The project this sidekick is for', on_delete=django.db.models.deletion.CASCADE, related_name='sidekick', to='projects.Project', verbose_name='project')), - ], - ), - ] diff --git a/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py b/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py deleted file mode 100644 index 8b53850b..00000000 --- a/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 2.2.5 on 2021-07-23 20:29 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('sidekick', '0001_initial'), - ] - - operations = [ - migrations.RemoveField( - model_name='sidekick', - name='tag_name', - ), - migrations.AlterField( - model_name='sidekick', - name='status', - field=models.IntegerField(choices=[(0, 'Success'), (1, 'Pending'), (2, 'Error')], default=1, help_text='The status of this sidekick', verbose_name='status'), - ), - ] diff --git a/documentcloud/sidekick/migrations/__init__.py b/documentcloud/sidekick/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/models.py b/documentcloud/sidekick/models.py deleted file mode 100644 index 8822ba06..00000000 --- a/documentcloud/sidekick/models.py +++ /dev/null @@ -1,45 +0,0 @@ -# Django -from django.db import models -from django.utils.translation import gettext_lazy as _ - -# Third Party -import numpy as np - -# DocumentCloud -from documentcloud.common import path -from documentcloud.common.environment import storage -from documentcloud.sidekick.choices import Status - -VOCAB_SIZE = 30_000 - - -def file_path(instance, file_name): - return f"sidekick/{instance.pk}/{file_name}" - - -class Sidekick(models.Model): - """Online learning for documents in a project""" - - project = models.OneToOneField( - verbose_name=_("project"), - to="projects.Project", - on_delete=models.CASCADE, - related_name="sidekick", - help_text=_("The project this sidekick is for"), - ) - status = models.IntegerField( - _("status"), - choices=Status.choices, - default=Status.pending, - help_text=_("The status of this sidekick"), - ) - - def get_document_vectors(self): - """Fetch the pre-preocessed document vectors from storage""" - with storage.open( - path.sidekick_document_vectors_path(self.project_id), "rb" - ) as vectors_file: - doc_vector_obj = np.load(vectors_file) - - # Grab document vector matrix - return (doc_vector_obj.get("vectors"), doc_vector_obj.get("ids")) diff --git a/documentcloud/sidekick/routers.py b/documentcloud/sidekick/routers.py deleted file mode 100644 index 77a1c6dc..00000000 --- a/documentcloud/sidekick/routers.py +++ /dev/null @@ -1,34 +0,0 @@ -# Django -from rest_framework.routers import DynamicRoute, Route - -# Third Party -from rest_framework_nested.routers import NestedDefaultRouter - - -class SidekickRouter(NestedDefaultRouter): - """Route list URL to detail views""" - - routes = [ - # List route. - Route( - url=r"^{prefix}{trailing_slash}$", - mapping={ - "get": "retrieve", - "put": "update", - "patch": "partial_update", - "post": "create", - "delete": "destroy", - }, - name="{basename}-detail", - detail=True, - initkwargs={"suffix": "Instance"}, - ), - # Dynamically generated list routes. Generated using - # @action(detail=False) decorator on methods of the viewset. - DynamicRoute( - url=r"^{prefix}/{url_path}{trailing_slash}$", - name="{basename}-{url_name}", - detail=True, - initkwargs={}, - ), - ] diff --git a/documentcloud/sidekick/rules.py b/documentcloud/sidekick/rules.py deleted file mode 100644 index 29ef7e83..00000000 --- a/documentcloud/sidekick/rules.py +++ /dev/null @@ -1,24 +0,0 @@ -# Third Party -from rules import add_perm, is_authenticated, predicate - -# DocumentCloud -from documentcloud.core.rules import skip_if_not_obj -from documentcloud.projects import rules as projects_rules - - -@predicate -@skip_if_not_obj -def can_view(user, sidekick): - return projects_rules.can_view(user, sidekick.project) - - -@predicate -@skip_if_not_obj -def can_change(user, sidekick): - return projects_rules.can_change(user, sidekick.project) - - -add_perm("sidekick.view_sidekick", can_view) -add_perm("sidekick.add_sidekick", is_authenticated) -add_perm("sidekick.change_sidekick", is_authenticated & can_change) -add_perm("sidekick.delete_sidekick", is_authenticated & can_change) diff --git a/documentcloud/sidekick/serializers.py b/documentcloud/sidekick/serializers.py deleted file mode 100644 index b26f4ef1..00000000 --- a/documentcloud/sidekick/serializers.py +++ /dev/null @@ -1,29 +0,0 @@ -# Django -from rest_framework import serializers - -# DocumentCloud -from documentcloud.documents.fields import ChoiceField -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - - -class SidekickSerializer(serializers.ModelSerializer): - status = ChoiceField( - Status, read_only=True, help_text=Sidekick._meta.get_field("status").help_text - ) - - class Meta: - model = Sidekick - fields = ["status"] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # Allow writing to status from processing lambda - context = kwargs.get("context", {}) - request = context.get("request") - has_request_auth = ( - request and hasattr(request, "auth") and request.auth is not None - ) - if has_request_auth and "processing" in request.auth.get("permissions", []): - self.fields["status"].read_only = False diff --git a/documentcloud/sidekick/sidekick.py b/documentcloud/sidekick/sidekick.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/signals.py b/documentcloud/sidekick/signals.py deleted file mode 100644 index 2fc2f554..00000000 --- a/documentcloud/sidekick/signals.py +++ /dev/null @@ -1,18 +0,0 @@ -# Django -from django.db.models.signals import post_delete -from django.dispatch import receiver - -# DocumentCloud -from documentcloud.common import path -from documentcloud.common.environment import storage -from documentcloud.sidekick.models import Sidekick - - -@receiver( - post_delete, - sender=Sidekick, - dispatch_uid="documentcloud.core.signals.delete_vectors", -) -def delete_vectors(instance, **kwargs): - """Delete vector files when deleting a sidekick instance""" - storage.delete(path.sidekick_document_vectors_path(instance.project_id)) diff --git a/documentcloud/sidekick/tasks.py b/documentcloud/sidekick/tasks.py deleted file mode 100644 index b0fdea92..00000000 --- a/documentcloud/sidekick/tasks.py +++ /dev/null @@ -1,164 +0,0 @@ -# Django -from celery import shared_task -from django.conf import settings -from django.db import transaction - -# Standard Library -import logging -import sys -from itertools import combinations - -# Third Party -from requests.exceptions import RequestException - -# DocumentCloud -from documentcloud.common.environment import httpsub -from documentcloud.documents.models import Document -from documentcloud.documents.tasks import solr_index_batch -from documentcloud.sidekick import lego -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - -logger = logging.getLogger(__name__) - -if settings.ENVIRONMENT.startswith("local"): - # pylint: disable=unused-import - # DocumentCloud - from documentcloud.sidekick.local_tasks import sidekick_preprocess - - -def _httpsub_submit(url, project_pk, json, task_): - """Helper to reliably submit a task to lambda via HTTP""" - logger.info( - "Submitting project %s for %s. Retry: %d", - project_pk, - task_.name, - task_.request.retries, - ) - try: - response = httpsub.post(url, json=json) - response.raise_for_status() - logger.info("Submitted project %s for %s succesfully.", project_pk, task_.name) - except RequestException as exc: - if task_.request.retries >= task_.max_retries: - Sidekick.objects.filter(project_id=project_pk).update(status=Status.error) - logger.error( - "Submitting project %s for %s failed: %s", - project_pk, - task_.name, - exc, - exc_info=sys.exc_info(), - ) - else: - raise - - -@shared_task( - autoretry_for=(RequestException,), - retry_backoff=30, - retry_kwargs={"max_retries": settings.HTTPSUB_RETRY_LIMIT}, -) -def preprocess(project_pk): - """Start the sidekick pre-processing""" - _httpsub_submit( - settings.SIDEKICK_PROCESSING_URL, - project_pk, - {"project_id": project_pk}, - preprocess, - ) - - -@shared_task -def lego_learn(sidekick_id, tag_name): - """Start the lego learning""" - - logger.info("[LEGO LEARN] %s %s", sidekick_id, tag_name) - - with transaction.atomic(): - try: - sidekick = Sidekick.objects.get(pk=sidekick_id) - except Sidekick.DoesNotExist: - logger.warning("Sidekick does not exist: %s", sidekick_id) - return - - if sidekick.status != Status.success: - logger.warning( - "Sidekick not in successful state: %s %s", sidekick_id, sidekick.status - ) - return - - sidekick.status = Status.pending - sidekick.save() - - try: - doc_vectors, doc_ids = sidekick.get_document_vectors() - except ValueError: - sidekick.status = Status.error - sidekick.save() - return - - doc_ids = list(doc_ids) - - positive_doc_indices = _load_doc_indices(sidekick, doc_ids, tag_name, "true") - negative_doc_indices = _load_doc_indices(sidekick, doc_ids, tag_name, "false") - - logger.info( - "[LEGO LEARN] positive: %d negative: %d", - len(positive_doc_indices), - len(negative_doc_indices), - ) - - logger.info("[LEGO LEARN] positive: %s", positive_doc_indices) - - # constraints - # list of triples of the form (id0, id1, constraint) - # where constraint is 1 if both id0 and id1 are positive docs - # (positively correlated) - # and constraint is 0 is one is positive and one is negative (negatively correlated) - constraints = [] - for doc0, doc1 in combinations(positive_doc_indices, 2): - constraints.append((doc0, doc1, 1)) - for doc0 in positive_doc_indices: - for doc1 in negative_doc_indices: - constraints.append((doc0, doc1, 0)) - - logger.info("[LEGO LEARN] constraints: %s", constraints) - - # percentiles - # list of percentiles corresponding to document index - dists, percentiles = lego.lego_learn(doc_vectors, constraints, positive_doc_indices) - - logger.info("[LEGO LEARN] dists: %s", dists) - logger.info("[LEGO LEARN] percentiles: %s", percentiles) - - documents = Document.objects.in_bulk(doc_ids) - for doc_id, dist in zip(doc_ids, dists): - documents[doc_id].data[f"{tag_name}_score"] = [str(dist)] - documents[doc_id].data[f"{tag_name}_likely"] = ( - ["Likely"] - if dist > 0.75 - else (["Unlikely"] if dist < -0.75 else ["Uncertain"]) - ) - documents[doc_id].solr_dirty = True - with transaction.atomic(): - Document.objects.bulk_update(documents.values(), ["data", "solr_dirty"]) - transaction.on_commit( - lambda: solr_index_batch.delay( - [int(i) for i in doc_ids], - field_updates={ - f"data_{tag_name}_score": "set", - f"data_{tag_name}_likely": "set", - }, - ) - ) - - sidekick.status = Status.success - sidekick.save() - - -def _load_doc_indices(sidekick, doc_ids, tag_name, tag_value): - """Load the document indices for the given tag name and value""" - docs = sidekick.project.documents.filter( - data__contains={tag_name: [tag_value]}, pk__in=doc_ids - ).values_list("pk", flat=True) - return [doc_ids.index(d) for d in docs] diff --git a/documentcloud/sidekick/tests/__init__.py b/documentcloud/sidekick/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/tests/test_views.py b/documentcloud/sidekick/tests/test_views.py deleted file mode 100644 index b86f6c8b..00000000 --- a/documentcloud/sidekick/tests/test_views.py +++ /dev/null @@ -1,114 +0,0 @@ -# Django -from django.conf import settings -from rest_framework import status - -# Standard Library -import json - -# Third Party -import pytest - -# DocumentCloud -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - - -@pytest.mark.django_db() -class TestSidekickAPI: - def test_create(self, client, project): - """Create a new sidekick""" - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_201_CREATED - response_json = json.loads(response.content) - assert response_json == {"status": "pending"} - - def test_create_no_perm(self, client, project, user): - """Create a new sidekick for a project you are not an editor for""" - client.force_authenticate(user=user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_create_reprocess(self, client, project): - """Use `create` to reprocess an existing sidekick""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_201_CREATED - response_json = json.loads(response.content) - assert response_json == {} - - def test_create_reprocess_pending(self, client, project): - """It is an error to attempt to re-process while already processing""" - Sidekick.objects.create(project=project, status=Status.pending) - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_400_BAD_REQUEST - response_json = json.loads(response.content) - assert response_json == ["Already processing"] - - def test_retrieve(self, client, project): - """Retrieve a sidekick""" - client.force_authenticate(user=project.user) - Sidekick.objects.create(project=project) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_200_OK - - def test_retrieve_no_exist(self, client, project): - """Retrieve a sidekick but no sidekick""" - client.force_authenticate(user=project.user) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_retrieve_no_perm(self, client, project, user): - """Retrieve a sidekick but no permissions""" - client.force_authenticate(user=user) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_delete(self, client, project, mocker): - """Delete a sidekick""" - mock_delete = mocker.patch("documentcloud.common.environment.storage.delete") - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_204_NO_CONTENT - mock_delete.assert_called_once() - - def test_delete_no_perm(self, client, project, user): - """Delete a sidekick without permissions""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_delete_no_exist(self, client, project): - """Delete a sidekick with no sidekick""" - client.force_authenticate(user=project.user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_update(self, client, project): - """Update a sidekick - nothing to update for normal users""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.patch(f"/api/projects/{project.pk}/sidekick/", {}) - assert response.status_code == status.HTTP_200_OK - - def test_update_processing_token(self, client, project): - """Update a sidekick with a processing token""" - Sidekick.objects.create(project=project, status=Status.pending) - response = client.patch( - f"/api/projects/{project.pk}/sidekick/", - {"status": "success"}, - HTTP_AUTHORIZATION=f"processing-token {settings.PROCESSING_TOKEN}", - ) - assert response.status_code == status.HTTP_200_OK - response_json = json.loads(response.content) - assert response_json == {"status": "success"} - - def test_update_no_exist(self, client, project): - """Update a sidekick that doesn't exist""" - client.force_authenticate(user=project.user) - response = client.patch(f"/api/projects/{project.pk}/sidekick/", {}) - assert response.status_code == status.HTTP_404_NOT_FOUND diff --git a/documentcloud/sidekick/views.py b/documentcloud/sidekick/views.py deleted file mode 100644 index ca27767f..00000000 --- a/documentcloud/sidekick/views.py +++ /dev/null @@ -1,82 +0,0 @@ -# Django -from django.db import transaction -from django.db.utils import IntegrityError -from django.http.response import Http404 -from rest_framework import serializers, status, viewsets -from rest_framework.decorators import action -from rest_framework.generics import get_object_or_404 -from rest_framework.response import Response - -# DocumentCloud -from documentcloud.core.permissions import ( - DjangoObjectPermissionsOrAnonReadOnly, - SidekickPermissions, -) -from documentcloud.projects.models import Project -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick -from documentcloud.sidekick.serializers import SidekickSerializer -from documentcloud.sidekick.tasks import lego_learn, preprocess - - -class SidekickViewSet(viewsets.ModelViewSet): - serializer_class = SidekickSerializer - queryset = Sidekick.objects.none() - permission_classes = (DjangoObjectPermissionsOrAnonReadOnly | SidekickPermissions,) - - def get_object(self): - """There is always at most one sidekick associated with a project""" - valid_token = ( - hasattr(self.request, "auth") - and self.request.auth is not None - and "processing" in self.request.auth.get("permissions", []) - ) - # Processing scope can access all documents - if valid_token: - projects = Project.objects.all() - else: - projects = Project.objects.get_editable(self.request.user) - project = get_object_or_404(projects, pk=self.kwargs["project_pk"]) - - try: - return project.sidekick - except Sidekick.DoesNotExist: - raise Http404 - - def perform_create(self, serializer): - """Specify the project""" - project = get_object_or_404( - Project.objects.get_editable(self.request.user), - pk=self.kwargs["project_pk"], - ) - try: - # try saving and processing the sidekick if one does not exist - with transaction.atomic(): - sidekick = serializer.save(project=project) - preprocess.delay(self.kwargs["project_pk"]) - except IntegrityError: - # a sidekick already exists, select it for updating - with transaction.atomic(): - sidekick = Sidekick.objects.select_for_update().get( - project_id=self.kwargs["project_pk"] - ) - if sidekick.status == Status.pending: - # if it is already processing then error - raise serializers.ValidationError("Already processing") - - # set to processing and begin the processing - sidekick.status = Status.pending - sidekick.save() - preprocess.delay(self.kwargs["project_pk"]) - - @action(detail=True, methods=["post"]) - def learn(self, request, project_pk=None): - """Activate lego learning""" - # pylint: disable=unused-argument - sidekick = self.get_object() - if "tagname" not in request.data: - raise serializers.ValidationError("Missing tagname") - - lego_learn.delay(sidekick.pk, request.data["tagname"]) - - return Response("OK", status=status.HTTP_200_OK) diff --git a/requirements/base.in b/requirements/base.in index f4c90fda..7c5d884c 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,6 +1,6 @@ pytz python-slugify -Pillow +Pillow==12.1.1 rcssmin argon2-cffi redis @@ -18,45 +18,45 @@ daily-active-users # Django # ------------------------------------------------------------------------------ -django<5 +django==5.2.12 django-environ django-model-utils -django-compressor +django-compressor>=4.5 django-redis -django-choices -django-filter +django-filter>=23.0 django-extensions django-autoslug django-premailer rules squarelet-auth dogslow -django-debug-toolbar +django-debug-toolbar==6.2.0 django-cprofile-middleware django-robots django-ipware django-parler # Django REST Framework -djangorestframework -rest-social-auth -social-auth-core[openidconnect] +djangorestframework==3.16.1 +rest-social-auth==9.0.0 +social-auth-core django-cors-headers -djangorestframework_simplejwt +djangorestframework_simplejwt==5.5.1 drf-nested-routers -cryptography # support RS256 for JWT +cryptography==46.0.0 # support RS256 for JWT drf-flex-fields django-parler-rest # Cloud providers -gcsfs +gcsfs>=2023.6.0 google-cloud-pubsub boto3 smart-open aioboto3 # to resolve version issues -wrapt==1.11.2 +wrapt>=1.14.0,<2 + # Processing cpuprofile @@ -67,23 +67,22 @@ numpy typing-extensions # Solr --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr +pysolr luqum # entity extraction -google-cloud-language +google-cloud-language>=2.11.0 # PDF pdfplumber pikepdf pymupdf -# sidekick -numba -scipy - - +# Logging logzio-python-handler # Documentation -drf-spectacular \ No newline at end of file +drf-spectacular + +# Python 2 compatibility some packages still require +six==1.17.0 diff --git a/requirements/base.txt b/requirements/base.txt index 8ae79beb..5bf4a6d9 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,31 +1,33 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/base.in # --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr - # via -r requirements/base.in aioboto3==9.6.0 # via -r requirements/base.in aiobotocore[boto3]==2.3.0 # via aioboto3 -aiohttp==3.8.1 - # via aiobotocore +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.10.5 + # via + # aiobotocore + # gcsfs aioitertools==0.10.0 # via aiobotocore aiosignal==1.2.0 # via aiohttp amqp==5.3.1 # via kombu -argon2-cffi==19.1.0 +argon2-cffi==25.1.0 # via -r requirements/base.in -asgiref==3.7.2 +argon2-cffi-bindings==25.1.0 + # via argon2-cffi +asgiref==3.11.1 # via django asttokens==2.0.5 # via stack-data -async-timeout==4.0.2 - # via aiohttp attrs==19.3.0 # via # aiohttp @@ -36,36 +38,29 @@ billiard==4.2.1 # via celery bleach==6.0.0 # via -r requirements/base.in -boto==2.49.0 - # via smart-open boto3==1.21.21 # via # -r requirements/base.in # aiobotocore - # smart-open botocore==1.24.21 # via # aiobotocore # boto3 # s3transfer -cachetools==3.1.1 - # via google-auth celery==5.4.0 # via # -r requirements/base.in # squarelet-auth certifi==2019.6.16 # via requests -cffi==1.15.1 +cffi==2.0.0 # via - # argon2-cffi + # argon2-cffi-bindings # cryptography chardet==3.0.4 # via pdfminer-six charset-normalizer==2.1.0 - # via - # aiohttp - # requests + # via requests click==8.1.7 # via # celery @@ -80,9 +75,10 @@ click-repl==0.3.0 # via celery cpuprofile==1.0.1 # via -r requirements/base.in -cryptography==35.0.0 +cryptography==46.0.0 # via # -r requirements/base.in + # google-auth # social-auth-core cssselect==1.1.0 # via premailer @@ -94,18 +90,18 @@ decorator==4.4.0 # via # gcsfs # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via pikepdf -django==4.2.2 +django==5.2.12 # via # -r requirements/base.in # daily-active-users # django-appconf - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -120,26 +116,25 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth django-appconf==1.0.3 # via django-compressor django-autoslug==1.9.8 # via -r requirements/base.in -django-choices==1.7.2 - # via -r requirements/base.in -django-compressor==4.4 +django-compressor==4.6.0 # via -r requirements/base.in django-cors-headers==3.13.0 # via -r requirements/base.in django-cprofile-middleware==1.0.5 # via -r requirements/base.in -django-debug-toolbar==4.1.0 +django-debug-toolbar==6.2.0 # via -r requirements/base.in django-environ==0.4.5 # via -r requirements/base.in django-extensions==3.2.3 # via -r requirements/base.in -django-filter==21.1 +django-filter==25.2 # via -r requirements/base.in django-ipware==4.0.2 # via -r requirements/base.in @@ -157,9 +152,9 @@ django-redis==5.0.0 # via # -r requirements/base.in # python-redis-lock -django-robots==5.0 +django-robots==6.1 # via -r requirements/base.in -djangorestframework==3.14.0 +djangorestframework==3.16.1 # via # -r requirements/base.in # django-parler-rest @@ -167,7 +162,7 @@ djangorestframework==3.14.0 # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 +djangorestframework-simplejwt==5.5.1 # via -r requirements/base.in dogslow==1.2 # via -r requirements/base.in @@ -177,46 +172,67 @@ drf-nested-routers==0.91 # via -r requirements/base.in drf-spectacular==0.28.0 # via -r requirements/base.in -ecdsa==0.13.2 - # via python-jose executing==0.8.3 # via stack-data -frozenlist==1.3.0 +frozenlist==1.8.0 # via # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2025.10.0 # via gcsfs furl==2.1.0 # via -r requirements/base.in -gcsfs==0.3.1 +gcsfs==2025.10.0 # via -r requirements/base.in -google-api-core[grpc]==1.23.0 +google-api-core[grpc]==2.17.1 # via + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage +google-auth==2.49.1 # via # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-storage +google-auth-oauthlib==1.3.1 # via gcsfs -google-cloud-language==2.0.0 +google-cloud-core==2.5.1 + # via google-cloud-storage +google-cloud-language==2.19.0 # via -r requirements/base.in -google-cloud-pubsub==1.0.2 +google-cloud-pubsub==2.10.0 # via -r requirements/base.in -googleapis-common-protos[grpc]==1.6.0 +google-cloud-storage==3.4.1 + # via gcsfs +google-crc32c==1.8.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 + # via google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 # via # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.12.7 # via google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # google-api-core + # google-cloud-pubsub html2text==2020.1.16 # via -r requirements/base.in idna==2.8 @@ -239,17 +255,13 @@ jsonschema==3.2.0 # drf-spectacular kombu==5.4.2 # via celery -libcst==0.4.1 - # via google-cloud-language listcrunch==1.0.0 # via -r requirements/base.in -llvmlite==0.38.1 - # via numba logzio-python-handler==4.1.1 # via -r requirements/base.in luqum==0.8.1 # via -r requirements/base.in -lxml==4.9.1 +lxml==6.0.2 # via # pikepdf # premailer @@ -261,15 +273,8 @@ multidict==4.7.5 # via # aiohttp # yarl -mypy-extensions==0.4.3 - # via typing-inspect -numba==0.55.2 +numpy==2.2.6 # via -r requirements/base.in -numpy==1.22.4 - # via - # -r requirements/base.in - # numba - # scipy oauthlib==3.1.0 # via # requests-oauthlib @@ -277,9 +282,7 @@ oauthlib==3.1.0 orderedmultidict==1.0.1 # via furl packaging==24.1 - # via - # deprecation - # pikepdf + # via pikepdf parso==0.8.3 # via jedi pdfminer-six==20200517 @@ -292,9 +295,9 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pikepdf==5.4.0 +pikepdf==10.5.1 # via -r requirements/base.in -pillow==9.2.0 +pillow==12.1.1 # via # -r requirements/base.in # pdfplumber @@ -307,12 +310,19 @@ prompt-toolkit==3.0.38 # via # click-repl # ipython -proto-plus==1.13.0 - # via google-cloud-language +propcache==0.4.1 + # via yarl +proto-plus==1.27.1 + # via + # google-cloud-language + # google-cloud-pubsub protobuf==4.25.2 # via # google-api-core + # google-cloud-language # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus ptyprocess==0.6.0 @@ -320,19 +330,16 @@ ptyprocess==0.6.0 pure-eval==0.2.2 # via stack-data pyasn1==0.4.7 - # via - # pyasn1-modules - # python-jose - # rsa + # via pyasn1-modules pyasn1-modules==0.2.7 # via google-auth pycparser==2.19 # via cffi pycryptodome==3.10.1 # via pdfminer-six -pygments==2.5.2 +pygments==2.20.0 # via ipython -pyjwt==2.3.0 +pyjwt==2.12.1 # via # djangorestframework-simplejwt # social-auth-core @@ -340,12 +347,12 @@ pymupdf==1.25.3 # via -r requirements/base.in pyrsistent==0.18.0 # via jsonschema -python-dateutil==2.8.2 +pysolr==3.11.0 + # via -r requirements/base.in +python-dateutil==2.9.0.post0 # via # botocore # celery -python-jose==3.3.0 - # via social-auth-core python-redis-lock[django]==3.3.1 # via -r requirements/base.in python-slugify==3.0.3 @@ -353,15 +360,10 @@ python-slugify==3.0.3 python3-openid==3.1.0 # via social-auth-core pytz==2022.1 - # via - # -r requirements/base.in - # djangorestframework - # google-api-core + # via -r requirements/base.in pyyaml==5.3.1 - # via - # drf-spectacular - # libcst -rcssmin==1.1.1 + # via drf-spectacular +rcssmin==1.2.2 # via # -r requirements/base.in # django-compressor @@ -370,54 +372,44 @@ redis==3.4.1 # -r requirements/base.in # django-redis # python-redis-lock -requests==2.28.1 +requests==2.32.5 # via # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler # premailer # pysolr # requests-oauthlib - # smart-open # social-auth-core # squarelet-auth requests-oauthlib==1.2.0 # via # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 +rest-social-auth==9.0.0 # via -r requirements/base.in -rjsmin==1.2.1 +rjsmin==1.2.5 # via django-compressor -rsa==4.0 - # via - # google-auth - # python-jose rules==2.1 # via -r requirements/base.in s3transfer==0.5.2 # via boto3 -scipy==1.8.1 - # via -r requirements/base.in -six==1.15.0 +six==1.17.0 # via - # argon2-cffi + # -r requirements/base.in # asttokens # bleach # django-appconf - # django-choices # furl - # google-api-core - # google-auth - # grpcio # jsonschema # orderedmultidict # python-dateutil -smart-open==1.8.4 +smart-open==7.5.1 # via -r requirements/base.in -social-auth-app-django==5.0.0 +social-auth-app-django==5.6.0 # via rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via # -r requirements/base.in # rest-social-auth @@ -442,11 +434,7 @@ traitlets==5.3.0 typing-extensions==4.15.0 # via # -r requirements/base.in - # asgiref - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via libcst + # grpcio tzdata==2024.2 # via # celery @@ -470,11 +458,13 @@ wcwidth==0.1.8 # via prompt-toolkit webencodings==0.5.1 # via bleach -wrapt==1.11.2 +wrapt==1.17.3 # via # -r requirements/base.in # aiobotocore -yarl==1.4.2 + # deprecated + # smart-open +yarl==1.23.0 # via aiohttp # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/local.in b/requirements/local.in index 843be959..b74f2fef 100644 --- a/requirements/local.in +++ b/requirements/local.in @@ -2,9 +2,10 @@ Werkzeug ipdb -Sphinx +Sphinx>8 psycopg2 --no-binary psycopg2 pip-tools +pip-audit # Testing # ------------------------------------------------------------------------------ @@ -34,10 +35,7 @@ fakeredis # ------------------------------------------------------------------------------ python-Levenshtein Unidecode -opencv-python requests-mock -fasttext==0.9.3 -scikit-learn # Documentation # ------------------------------------------------------------------------------ diff --git a/requirements/local.txt b/requirements/local.txt index 660893b7..c6818566 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -1,44 +1,51 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/local.in # --no-binary psycopg2 --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr - # via -r requirements/./base.txt -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock # via -r requirements/local.in aioboto3==9.6.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt aiobotocore[boto3]==2.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aioboto3 -aiohttp==3.8.1 +aiohappyeyeballs==2.6.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiohttp +aiohttp==3.10.5 + # via + # -r requirements/base.txt # aiobotocore + # gcsfs aioitertools==0.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore aiosignal==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp -alabaster==0.7.12 +alabaster==1.0.0 # via sphinx amqp==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # kombu -argon2-cffi==19.1.0 - # via -r requirements/./base.txt -asgiref==3.7.2 +argon2-cffi==25.1.0 + # via -r requirements/base.txt +argon2-cffi-bindings==25.1.0 + # via + # -r requirements/base.txt + # argon2-cffi +asgiref==3.11.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django astroid==2.11.7 # via @@ -46,78 +53,70 @@ astroid==2.11.7 # pylint-celery asttokens==2.0.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -async-timeout==4.0.2 - # via - # -r requirements/./base.txt - # aiohttp attrs==19.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # jsonschema # pytest -babel==2.7.0 +babel==2.18.0 # via sphinx backcall==0.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython billiard==4.2.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery black==24.8.0 # via -r requirements/local.in bleach==6.0.0 - # via -r requirements/./base.txt -boto==2.49.0 - # via - # -r requirements/./base.txt - # smart-open + # via -r requirements/base.txt +boolean-py==5.0 + # via license-expression boto3==1.21.21 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore - # smart-open botocore==1.24.21 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # boto3 # s3transfer build==1.2.2.post1 # via pip-tools -cachetools==3.1.1 +cachecontrol[filecache]==0.14.4 # via - # -r requirements/./base.txt - # google-auth + # cachecontrol + # pip-audit celery==5.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # squarelet-auth certifi==2019.6.16 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests -cffi==1.15.1 +cffi==2.0.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt + # argon2-cffi-bindings # cryptography chardet==3.0.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfminer-six charset-normalizer==2.1.0 # via - # -r requirements/./base.txt - # aiohttp + # -r requirements/base.txt # requests click==8.1.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # black # celery # click-didyoumean @@ -126,15 +125,15 @@ click==8.1.7 # pip-tools click-didyoumean==0.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-plugins==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-repl==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery colorama==0.4.6 # via pytest-watch @@ -143,44 +142,48 @@ coverage==4.5.4 # -r requirements/local.in # django-coverage-plugin cpuprofile==1.0.1 - # via -r requirements/./base.txt -cryptography==35.0.0 + # via -r requirements/base.txt +cryptography==46.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-auth # social-auth-core cssselect==1.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer cssutils==1.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer +cyclonedx-python-lib==7.6.2 + # via pip-audit daily-active-users==0.1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt decorator==4.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # ipdb # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # py-serializable # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf dill==0.3.5.1 # via pylint -django==4.2.2 +django==5.2.12 # via - # -r requirements/./base.txt + # -r requirements/base.txt # daily-active-users # django-appconf - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -195,82 +198,77 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth django-appconf==1.0.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor django-autoslug==1.9.8 - # via -r requirements/./base.txt -django-choices==1.7.2 - # via -r requirements/./base.txt -django-compressor==4.4 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-compressor==4.6.0 + # via -r requirements/base.txt django-cors-headers==3.13.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-coverage-plugin==2.0.3 # via -r requirements/local.in django-cprofile-middleware==1.0.5 - # via -r requirements/./base.txt -django-debug-toolbar==4.1.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-debug-toolbar==6.2.0 + # via -r requirements/base.txt django-environ==0.4.5 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-extensions==3.2.3 - # via -r requirements/./base.txt -django-filter==21.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-filter==25.2 + # via -r requirements/base.txt django-ipware==4.0.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-model-utils==3.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-parler==2.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest django-parler-rest==2.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-premailer==0.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-redis==5.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-redis-lock -django-robots==5.0 - # via -r requirements/./base.txt -djangorestframework==3.14.0 +django-robots==6.1 + # via -r requirements/base.txt +djangorestframework==3.16.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest # djangorestframework-simplejwt # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 - # via -r requirements/./base.txt +djangorestframework-simplejwt==5.5.1 + # via -r requirements/base.txt docopt==0.6.2 # via pytest-watch -docutils==0.15.2 +docutils==0.21.2 # via sphinx dogslow==1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-flex-fields==1.0.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-nested-routers==0.91 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-spectacular==0.28.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/local.in -ecdsa==0.13.2 - # via - # -r requirements/./base.txt - # python-jose entrypoints==0.3 # via flake8 executing==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data factory-boy==2.12.0 # via -r requirements/local.in @@ -278,71 +276,104 @@ faker==2.0.1 # via factory-boy fakeredis==1.0.5 # via -r requirements/local.in -fasttext==0.9.3 - # via -r requirements/local.in +filelock==3.25.2 + # via cachecontrol flake8==3.7.8 # via -r requirements/local.in -frozenlist==1.3.0 +frozenlist==1.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2025.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs furl==2.1.0 - # via -r requirements/./base.txt -gcsfs==0.3.1 - # via -r requirements/./base.txt -google-api-core[grpc]==1.23.0 + # via -r requirements/base.txt +gcsfs==2025.10.0 + # via -r requirements/base.txt +google-api-core[grpc]==2.17.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage +google-auth==2.49.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-storage +google-auth-oauthlib==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs -google-cloud-language==2.0.0 - # via -r requirements/./base.txt -google-cloud-pubsub==1.0.2 - # via -r requirements/./base.txt -googleapis-common-protos[grpc]==1.6.0 +google-cloud-core==2.5.1 + # via + # -r requirements/base.txt + # google-cloud-storage +google-cloud-language==2.19.0 + # via -r requirements/base.txt +google-cloud-pubsub==2.10.0 + # via -r requirements/base.txt +google-cloud-storage==3.4.1 + # via + # -r requirements/base.txt + # gcsfs +google-crc32c==1.8.0 + # via + # -r requirements/base.txt + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 + # via + # -r requirements/base.txt # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.12.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # -r requirements/base.txt + # google-api-core + # google-cloud-pubsub html2text==2020.1.16 - # via -r requirements/./base.txt + # via -r requirements/base.txt +html5lib==1.1 + # via pip-audit idna==2.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests # yarl -imagesize==1.1.0 +imagesize==2.0.0 # via sphinx importlib-metadata==6.0.0 # via pluggy inflection==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular iniconfig==1.1.1 # via pytest @@ -350,135 +381,131 @@ ipdb==0.13.9 # via -r requirements/local.in ipython==8.5.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipdb isort==4.3.21 # via pylint jedi==0.18.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -jinja2==2.10.1 +jinja2==3.1.6 # via sphinx jmespath==0.9.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 # botocore -joblib==1.0.1 - # via scikit-learn jsonschema==3.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular kombu==5.4.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery lazy-object-proxy==1.4.2 # via astroid -libcst==0.4.1 - # via - # -r requirements/./base.txt - # google-cloud-language +levenshtein==0.27.3 + # via python-levenshtein +license-expression==30.4.4 + # via cyclonedx-python-lib listcrunch==1.0.0 - # via -r requirements/./base.txt -llvmlite==0.38.1 - # via - # -r requirements/./base.txt - # numba + # via -r requirements/base.txt logzio-python-handler==4.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt luqum==0.8.1 - # via -r requirements/./base.txt -lxml==4.9.1 + # via -r requirements/base.txt +lxml==6.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf # premailer markdown==3.4.1 - # via -r requirements/./base.txt -markupsafe==1.1.1 + # via -r requirements/base.txt +markdown-it-py==4.0.0 + # via rich +markupsafe==3.0.3 # via jinja2 matplotlib-inline==0.1.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython mccabe==0.6.1 # via # flake8 # pylint +mdurl==0.1.2 + # via markdown-it-py +msgpack==1.1.2 + # via cachecontrol multidict==4.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # yarl mypy-extensions==0.4.3 - # via - # -r requirements/./base.txt - # black - # typing-inspect -numba==0.55.2 - # via -r requirements/./base.txt -numpy==1.22.4 - # via - # -r requirements/./base.txt - # fasttext - # numba - # opencv-python - # scikit-learn - # scipy + # via black +numpy==2.2.6 + # via -r requirements/base.txt oauthlib==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests-oauthlib # social-auth-core -opencv-python==4.6.0.66 - # via -r requirements/local.in orderedmultidict==1.0.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # furl +packageurl-python==0.17.6 + # via cyclonedx-python-lib packaging==24.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # black # build - # deprecation # pikepdf + # pip-audit + # pip-requirements-parser # pytest # pytest-sugar # sphinx parso==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jedi pathspec==0.9.0 # via black pdfminer-six==20200517 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber pdfplumber==0.5.28 - # via -r requirements/./base.txt + # via -r requirements/base.txt pebble==4.5.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt pexpect==4.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython pickleshare==0.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pikepdf==5.4.0 - # via -r requirements/./base.txt -pillow==9.2.0 +pikepdf==10.5.1 + # via -r requirements/base.txt +pillow==12.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber # pikepdf -pip-tools==7.4.1 +pip-api==0.0.34 + # via pip-audit +pip-audit==2.7.3 + # via -r requirements/local.in +pip-requirements-parser==32.0.1 + # via pip-audit +pip-tools==7.5.3 # via -r requirements/local.in platformdirs==2.5.2 # via @@ -488,72 +515,79 @@ pluggy==0.12.0 # via pytest ply==3.11 # via - # -r requirements/./base.txt + # -r requirements/base.txt # luqum premailer==3.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-premailer prompt-toolkit==3.0.38 # via - # -r requirements/./base.txt + # -r requirements/base.txt # click-repl # ipython -proto-plus==1.13.0 +propcache==0.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # yarl +proto-plus==1.27.1 + # via + # -r requirements/base.txt # google-cloud-language + # google-cloud-pubsub protobuf==4.25.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus psycopg2==2.9.6 # via -r requirements/local.in ptyprocess==0.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pexpect pure-eval==0.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data py==1.8.2 # via pytest +py-serializable==1.1.2 + # via cyclonedx-python-lib pyasn1==0.4.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pyasn1-modules - # python-jose - # rsa pyasn1-modules==0.2.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth -pybind11==2.6.2 - # via fasttext pycodestyle==2.5.0 # via flake8 pycparser==2.19 # via - # -r requirements/./base.txt + # -r requirements/base.txt # cffi pycryptodome==3.10.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfminer-six pyflakes==2.1.1 # via flake8 -pygments==2.5.2 +pygments==2.20.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython + # rich # sphinx -pyjwt==2.3.0 +pyjwt==2.12.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # djangorestframework-simplejwt # social-auth-core pylint==2.14.5 @@ -570,15 +604,19 @@ pylint-plugin-utils==0.7 # pylint-celery # pylint-django pymupdf==1.25.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt +pyparsing==3.3.2 + # via pip-requirements-parser pyproject-hooks==1.2.0 # via # build # pip-tools pyrsistent==0.18.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jsonschema +pysolr==3.11.0 + # via -r requirements/base.txt pytest==7.1.2 # via # -r requirements/local.in @@ -594,58 +632,53 @@ pytest-sugar==0.9.2 # via -r requirements/local.in pytest-watch==4.2.0 # via -r requirements/local.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # celery # faker -python-jose==3.3.0 - # via - # -r requirements/./base.txt - # social-auth-core -python-levenshtein==0.12.0 +python-levenshtein==0.27.3 # via -r requirements/local.in python-redis-lock[django]==3.3.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt python-slugify==3.0.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt python3-openid==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # social-auth-core pytz==2022.1 - # via - # -r requirements/./base.txt - # babel - # djangorestframework - # google-api-core + # via -r requirements/base.txt pyyaml==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular - # libcst -rcssmin==1.1.1 +rapidfuzz==3.14.5 + # via levenshtein +rcssmin==1.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor redis==3.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-redis # fakeredis # python-redis-lock -requests==2.28.1 +requests==2.32.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # cachecontrol # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler + # pip-audit # premailer # pysolr # requests-mock # requests-oauthlib - # smart-open # social-auth-core # sphinx # squarelet-auth @@ -653,181 +686,160 @@ requests-mock==1.9.3 # via -r requirements/local.in requests-oauthlib==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 - # via -r requirements/./base.txt -rjsmin==1.2.1 +rest-social-auth==9.0.0 + # via -r requirements/base.txt +rich==14.3.3 + # via pip-audit +rjsmin==1.2.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor -rsa==4.0 - # via - # -r requirements/./base.txt - # google-auth - # python-jose rules==2.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt s3transfer==0.5.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 -scikit-learn==1.1.1 - # via -r requirements/local.in -scipy==1.8.1 - # via - # -r requirements/./base.txt - # scikit-learn -six==1.15.0 +six==1.17.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt # asttokens # bleach # django-appconf - # django-choices # django-coverage-plugin # faker # fakeredis # furl - # google-api-core - # google-auth - # grpcio + # html5lib # jsonschema # orderedmultidict # python-dateutil # requests-mock -smart-open==1.8.4 - # via -r requirements/./base.txt -snowballstemmer==1.9.1 +smart-open==7.5.1 + # via -r requirements/base.txt +snowballstemmer==3.0.1 # via sphinx -social-auth-app-django==5.0.0 +social-auth-app-django==5.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth # social-auth-app-django # squarelet-auth sortedcontainers==2.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # cyclonedx-python-lib # fakeredis # pdfminer-six -sphinx==2.2.0 +sphinx==8.1.3 # via -r requirements/local.in -sphinxcontrib-applehelp==1.0.1 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.1 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==1.0.2 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.2 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.3 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx sqlparse==0.4.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django # django-debug-toolbar squarelet-auth==0.1.14 - # via -r requirements/./base.txt + # via -r requirements/base.txt stack-data==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython termcolor==1.1.0 # via pytest-sugar text-unidecode==1.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # faker # python-slugify -threadpoolctl==2.2.0 - # via scikit-learn toml==0.10.2 - # via ipdb -tomli==2.0.1 # via - # black - # build - # pip-tools - # pylint - # pytest + # ipdb + # pip-audit +tomli==2.0.1 + # via pytest tomlkit==0.11.4 # via pylint traitlets==5.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython # matplotlib-inline typing-extensions==4.15.0 # via - # -r requirements/./base.txt - # asgiref - # black - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via - # -r requirements/./base.txt - # libcst + # -r requirements/base.txt + # grpcio tzdata==2024.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # kombu unidecode==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/local.in uritemplate==4.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular urllib3==1.26.10 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # requests vine==5.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # amqp # celery # kombu wand==0.6.6 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber watchdog==6.0.0 # via pytest-watch wcwidth==0.1.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # prompt-toolkit webencodings==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # bleach + # html5lib werkzeug==2.1.2 # via -r requirements/local.in wheel==0.45.1 # via pip-tools -wrapt==1.11.2 +wrapt==1.17.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # astroid -yarl==1.4.2 + # deprecated + # smart-open +yarl==1.23.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp zipp==3.11.0 # via importlib-metadata diff --git a/requirements/production.in b/requirements/production.in index ec9a1a87..8d3921df 100644 --- a/requirements/production.in +++ b/requirements/production.in @@ -12,4 +12,5 @@ django-storages[boto3] django-anymail[mailgun] django-celery-email +# Documentation drf-spectacular \ No newline at end of file diff --git a/requirements/production.txt b/requirements/production.txt index ded6e691..892e4334 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -1,173 +1,167 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/production.in # --no-binary psycopg2 --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr - # via -r requirements/./base.txt aioboto3==9.6.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt aiobotocore[boto3]==2.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aioboto3 -aiohttp==3.8.1 +aiohappyeyeballs==2.6.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiohttp +aiohttp==3.10.5 + # via + # -r requirements/base.txt # aiobotocore + # gcsfs aioitertools==0.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore aiosignal==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp amqp==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # kombu -argon2-cffi==19.1.0 - # via -r requirements/./base.txt -asgiref==3.7.2 +argon2-cffi==25.1.0 + # via -r requirements/base.txt +argon2-cffi-bindings==25.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # argon2-cffi +asgiref==3.11.1 + # via + # -r requirements/base.txt # django # scout-apm asttokens==2.0.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -async-timeout==4.0.2 - # via - # -r requirements/./base.txt - # aiohttp attrs==19.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # jsonschema backcall==0.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython billiard==4.2.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery bleach==6.0.0 - # via -r requirements/./base.txt -boto==2.49.0 - # via - # -r requirements/./base.txt - # smart-open + # via -r requirements/base.txt boto3==1.21.21 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # django-storages - # smart-open botocore==1.24.21 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # boto3 # s3transfer -cachetools==3.1.1 - # via - # -r requirements/./base.txt - # google-auth celery==5.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-celery-email # squarelet-auth certifi==2019.6.16 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests # sentry-sdk # urllib3 -cffi==1.15.1 +cffi==2.0.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt + # argon2-cffi-bindings # cryptography chardet==3.0.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfminer-six charset-normalizer==2.1.0 # via - # -r requirements/./base.txt - # aiohttp + # -r requirements/base.txt # requests click==8.1.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # click-didyoumean # click-plugins # click-repl click-didyoumean==0.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-plugins==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-repl==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery collectfast==2.2.0 # via -r requirements/production.in cpuprofile==1.0.1 - # via -r requirements/./base.txt -cryptography==35.0.0 + # via -r requirements/base.txt +cryptography==46.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-anymail + # google-auth # pyopenssl # social-auth-core # urllib3 cssselect==1.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer cssutils==1.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer daily-active-users==0.1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt decorator==4.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf -django==4.2.2 +django==5.2.12 # via - # -r requirements/./base.txt + # -r requirements/base.txt # collectfast # daily-active-users # django-anymail # django-appconf # django-celery-email - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -183,268 +177,284 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth django-anymail[mailgun]==10.0 # via -r requirements/production.in django-appconf==1.0.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-celery-email # django-compressor django-autoslug==1.9.8 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-celery-email==3.0.0 # via -r requirements/production.in -django-choices==1.7.2 - # via -r requirements/./base.txt -django-compressor==4.4 - # via -r requirements/./base.txt +django-compressor==4.6.0 + # via -r requirements/base.txt django-cors-headers==3.13.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-cprofile-middleware==1.0.5 - # via -r requirements/./base.txt -django-debug-toolbar==4.1.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-debug-toolbar==6.2.0 + # via -r requirements/base.txt django-environ==0.4.5 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-extensions==3.2.3 - # via -r requirements/./base.txt -django-filter==21.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-filter==25.2 + # via -r requirements/base.txt django-ipware==4.0.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-model-utils==3.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-parler==2.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest django-parler-rest==2.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-premailer==0.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-redis==5.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-redis-lock -django-robots==5.0 - # via -r requirements/./base.txt +django-robots==6.1 + # via -r requirements/base.txt django-storages[boto3]==1.12.3 # via # -r requirements/production.in # collectfast -djangorestframework==3.14.0 +djangorestframework==3.16.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest # djangorestframework-simplejwt # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 - # via -r requirements/./base.txt +djangorestframework-simplejwt==5.5.1 + # via -r requirements/base.txt dogslow==1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-flex-fields==1.0.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-nested-routers==0.91 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-spectacular==0.28.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/production.in -ecdsa==0.13.2 - # via - # -r requirements/./base.txt - # python-jose executing==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -frozenlist==1.3.0 +frozenlist==1.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2025.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs furl==2.1.0 - # via -r requirements/./base.txt -gcsfs==0.3.1 - # via -r requirements/./base.txt -google-api-core[grpc]==1.23.0 + # via -r requirements/base.txt +gcsfs==2025.10.0 + # via -r requirements/base.txt +google-api-core[grpc]==2.17.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage +google-auth==2.49.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-storage +google-auth-oauthlib==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs -google-cloud-language==2.0.0 - # via -r requirements/./base.txt -google-cloud-pubsub==1.0.2 - # via -r requirements/./base.txt -googleapis-common-protos[grpc]==1.6.0 +google-cloud-core==2.5.1 + # via + # -r requirements/base.txt + # google-cloud-storage +google-cloud-language==2.19.0 + # via -r requirements/base.txt +google-cloud-pubsub==2.10.0 + # via -r requirements/base.txt +google-cloud-storage==3.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # gcsfs +google-crc32c==1.8.0 + # via + # -r requirements/base.txt + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 + # via + # -r requirements/base.txt + # google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 + # via + # -r requirements/base.txt # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.12.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # -r requirements/base.txt + # google-api-core + # google-cloud-pubsub gunicorn==20.1.0 # via -r requirements/production.in html2text==2020.1.16 - # via -r requirements/./base.txt + # via -r requirements/base.txt idna==2.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests # urllib3 # yarl inflection==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular ipython==8.5.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt jedi==0.18.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython jmespath==0.9.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 # botocore jsonschema==3.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular kombu==5.4.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery -libcst==0.4.1 - # via - # -r requirements/./base.txt - # google-cloud-language listcrunch==1.0.0 - # via -r requirements/./base.txt -llvmlite==0.38.1 - # via - # -r requirements/./base.txt - # numba + # via -r requirements/base.txt logzio-python-handler==4.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt luqum==0.8.1 - # via -r requirements/./base.txt -lxml==4.9.1 + # via -r requirements/base.txt +lxml==6.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf # premailer markdown==3.4.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt matplotlib-inline==0.1.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython multidict==4.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # yarl -mypy-extensions==0.4.3 - # via - # -r requirements/./base.txt - # typing-inspect -numba==0.55.2 - # via -r requirements/./base.txt -numpy==1.22.4 - # via - # -r requirements/./base.txt - # numba - # scipy +numpy==2.2.6 + # via -r requirements/base.txt oauthlib==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests-oauthlib # social-auth-core orderedmultidict==1.0.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # furl packaging==24.1 # via - # -r requirements/./base.txt - # deprecation + # -r requirements/base.txt # pikepdf parso==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jedi pdfminer-six==20200517 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber pdfplumber==0.5.28 - # via -r requirements/./base.txt + # via -r requirements/base.txt pebble==4.5.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt pexpect==4.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython pickleshare==0.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pikepdf==5.4.0 - # via -r requirements/./base.txt -pillow==9.2.0 +pikepdf==10.5.1 + # via -r requirements/base.txt +pillow==12.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber # pikepdf ply==3.11 # via - # -r requirements/./base.txt + # -r requirements/base.txt # luqum premailer==3.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-premailer prompt-toolkit==3.0.38 # via - # -r requirements/./base.txt + # -r requirements/base.txt # click-repl # ipython -proto-plus==1.13.0 +propcache==0.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # yarl +proto-plus==1.27.1 + # via + # -r requirements/base.txt # google-cloud-language + # google-cloud-pubsub protobuf==4.25.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus psutil==5.7.3 @@ -453,201 +463,175 @@ psycopg2==2.9.6 # via -r requirements/production.in ptyprocess==0.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pexpect pure-eval==0.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data pyasn1==0.4.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pyasn1-modules - # python-jose - # rsa pyasn1-modules==0.2.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth pycparser==2.19 # via - # -r requirements/./base.txt + # -r requirements/base.txt # cffi pycryptodome==3.10.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfminer-six -pygments==2.5.2 +pygments==2.20.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pyjwt==2.3.0 +pyjwt==2.12.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # djangorestframework-simplejwt # social-auth-core pymupdf==1.25.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt pyopenssl==19.1.0 # via urllib3 pyrsistent==0.18.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jsonschema -python-dateutil==2.8.2 +pysolr==3.11.0 + # via -r requirements/base.txt +python-dateutil==2.9.0.post0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # celery -python-jose==3.3.0 - # via - # -r requirements/./base.txt - # social-auth-core python-redis-lock[django]==3.3.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt python-slugify==3.0.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt python3-openid==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # social-auth-core pytz==2022.1 - # via - # -r requirements/./base.txt - # djangorestframework - # google-api-core + # via -r requirements/base.txt pyyaml==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular - # libcst -rcssmin==1.1.1 +rcssmin==1.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor redis==3.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-redis # python-redis-lock -requests==2.28.1 +requests==2.32.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-anymail # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler # premailer # pysolr # requests-oauthlib - # smart-open # social-auth-core # squarelet-auth requests-oauthlib==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 - # via -r requirements/./base.txt -rjsmin==1.2.1 +rest-social-auth==9.0.0 + # via -r requirements/base.txt +rjsmin==1.2.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor -rsa==4.0 - # via - # -r requirements/./base.txt - # google-auth - # python-jose rules==2.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt s3transfer==0.5.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 -scipy==1.8.1 - # via -r requirements/./base.txt scout-apm==2.17.0 # via -r requirements/production.in sentry-sdk==1.0.0 # via -r requirements/production.in -six==1.15.0 +six==1.17.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt # asttokens # bleach # django-appconf - # django-choices # furl - # google-api-core - # google-auth - # grpcio # jsonschema # orderedmultidict # pyopenssl # python-dateutil -smart-open==1.8.4 - # via -r requirements/./base.txt -social-auth-app-django==5.0.0 +smart-open==7.5.1 + # via -r requirements/base.txt +social-auth-app-django==5.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth # social-auth-app-django # squarelet-auth sortedcontainers==2.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfminer-six sqlparse==0.4.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django # django-debug-toolbar squarelet-auth==0.1.14 - # via -r requirements/./base.txt + # via -r requirements/base.txt stack-data==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython text-unidecode==1.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-slugify traitlets==5.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython # matplotlib-inline typing-extensions==4.15.0 # via - # -r requirements/./base.txt - # asgiref + # -r requirements/base.txt # collectfast - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via - # -r requirements/./base.txt - # libcst + # grpcio tzdata==2024.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # kombu unidecode==1.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt uritemplate==4.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular urllib3[secure]==1.26.10 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # django-anymail # requests @@ -655,30 +639,32 @@ urllib3[secure]==1.26.10 # sentry-sdk vine==5.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # amqp # celery # kombu wand==0.6.6 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber wcwidth==0.1.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # prompt-toolkit webencodings==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # bleach -wrapt==1.11.2 +wrapt==1.17.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore + # deprecated # scout-apm -yarl==1.4.2 + # smart-open +yarl==1.23.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # The following packages are considered to be unsafe in a requirements file: From a65be7169d53cc6301bd6b055d293bb7ce949d8a Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:25:56 -0500 Subject: [PATCH 02/40] Getting dependencies up to date --- compose/local/django/Dockerfile | 7 +- compose/production/django/Dockerfile | 4 +- requirements/base.in | 28 ++--- requirements/base.txt | 106 ++++++++++++------- requirements/local.in | 2 +- requirements/local.txt | 150 ++++++++++++++++----------- requirements/production.txt | 141 +++++++++++++++---------- 7 files changed, 264 insertions(+), 174 deletions(-) diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile index 3d1f9941..e895be46 100644 --- a/compose/local/django/Dockerfile +++ b/compose/local/django/Dockerfile @@ -20,8 +20,8 @@ RUN curl http://localhost/rootCA.pem > /usr/local/share/ca-certificates/rootCA.c # Requirements are installed here to ensure they will be cached. COPY ./requirements /requirements -# RUN pip install --upgrade pip && pip install -r /requirements/local.txt -RUN pip install -r /requirements/local.txt + +RUN pip install --upgrade "pip>=26,<27" && pip install -r /requirements/local.txt COPY ./compose/production/django/entrypoint /entrypoint RUN sed -i 's/\r//' /entrypoint && chmod +x /entrypoint @@ -42,9 +42,6 @@ RUN sed -i 's/\r//' /start-flower && chmod +x /start-flower # Entry point # =-=-=-=-=-= -# Temporary measure to get pip-compile to work -# RUN pip install 'pip<19.2' - WORKDIR /app ENV LD_LIBRARY_PATH /app/documentcloud/documents/processing/ocr/tesseract diff --git a/compose/production/django/Dockerfile b/compose/production/django/Dockerfile index 6cea765b..874943e9 100644 --- a/compose/production/django/Dockerfile +++ b/compose/production/django/Dockerfile @@ -12,9 +12,9 @@ RUN apt-get -qq -y update && apt-get -qq -y install \ RUN groupadd -r django \ && useradd -r -g django django -# Requirements are installed here to ensure they will be cached. +# Requirements are installed here to ensure they will be cached. COPY ./requirements /requirements -RUN pip install --no-cache-dir -r /requirements/production.txt \ +RUN pip install --upgrade "pip>=26,<27" && pip install --no-cache-dir -r /requirements/production.txt \ && rm -rf /requirements COPY ./compose/production/django/entrypoint /entrypoint diff --git a/requirements/base.in b/requirements/base.in index 7c5d884c..7f3f379f 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,6 +1,6 @@ pytz python-slugify -Pillow==12.1.1 +Pillow rcssmin argon2-cffi redis @@ -18,46 +18,45 @@ daily-active-users # Django # ------------------------------------------------------------------------------ -django==5.2.12 +django<6 django-environ django-model-utils -django-compressor>=4.5 +django-compressor django-redis -django-filter>=23.0 +django-filter django-extensions django-autoslug django-premailer rules squarelet-auth dogslow -django-debug-toolbar==6.2.0 +django-debug-toolbar django-cprofile-middleware django-robots django-ipware django-parler # Django REST Framework -djangorestframework==3.16.1 -rest-social-auth==9.0.0 +djangorestframework +rest-social-auth social-auth-core django-cors-headers -djangorestframework_simplejwt==5.5.1 +djangorestframework_simplejwt drf-nested-routers -cryptography==46.0.0 # support RS256 for JWT +cryptography>=46.0.7 # support RS256 for JWT drf-flex-fields django-parler-rest # Cloud providers -gcsfs>=2023.6.0 +gcsfs google-cloud-pubsub -boto3 smart-open -aioboto3 +aioboto3>=15 +boto3 # to resolve version issues wrapt>=1.14.0,<2 - # Processing cpuprofile listcrunch @@ -71,7 +70,7 @@ pysolr luqum # entity extraction -google-cloud-language>=2.11.0 +google-cloud-language # PDF pdfplumber @@ -86,3 +85,4 @@ drf-spectacular # Python 2 compatibility some packages still require six==1.17.0 + diff --git a/requirements/base.txt b/requirements/base.txt index 5bf4a6d9..65fbdd12 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,19 +4,21 @@ # # pip-compile requirements/base.in # -aioboto3==9.6.0 +aioboto3==15.5.0 # via -r requirements/base.in -aiobotocore[boto3]==2.3.0 +aiobotocore[boto3]==2.25.1 + # via aioboto3 +aiofiles==25.1.0 # via aioboto3 aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.10.5 +aiohttp==3.13.5 # via # aiobotocore # gcsfs aioitertools==0.10.0 # via aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via aiohttp amqp==5.3.1 # via kombu @@ -38,11 +40,11 @@ billiard==4.2.1 # via celery bleach==6.0.0 # via -r requirements/base.in -boto3==1.21.21 +boto3==1.40.61 # via # -r requirements/base.in # aiobotocore -botocore==1.24.21 +botocore==1.40.61 # via # aiobotocore # boto3 @@ -51,16 +53,16 @@ celery==5.4.0 # via # -r requirements/base.in # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via requests cffi==2.0.0 # via # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via pdfminer-six charset-normalizer==2.1.0 - # via requests + # via + # pdfminer-six + # requests click==8.1.7 # via # celery @@ -75,10 +77,11 @@ click-repl==0.3.0 # via celery cpuprofile==1.0.1 # via -r requirements/base.in -cryptography==46.0.0 +cryptography==46.0.7 # via # -r requirements/base.in # google-auth + # pdfminer-six # social-auth-core cssselect==1.1.0 # via premailer @@ -96,7 +99,7 @@ defusedxml==0.7.1 # social-auth-core deprecated==1.3.1 # via pikepdf -django==5.2.12 +django==5.2.13 # via # -r requirements/base.in # daily-active-users @@ -178,18 +181,19 @@ frozenlist==1.8.0 # via # aiohttp # aiosignal -fsspec==2025.10.0 +fsspec==2026.3.0 # via gcsfs furl==2.1.0 # via -r requirements/base.in -gcsfs==2025.10.0 +gcsfs==2026.3.0 # via -r requirements/base.in -google-api-core[grpc]==2.17.1 +google-api-core[grpc]==2.29.0 # via # google-cloud-core # google-cloud-language # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth==2.49.1 # via # gcsfs @@ -197,16 +201,20 @@ google-auth==2.49.1 # google-auth-oauthlib # google-cloud-core # google-cloud-language + # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth-oauthlib==1.3.1 # via gcsfs google-cloud-core==2.5.1 # via google-cloud-storage google-cloud-language==2.19.0 # via -r requirements/base.in -google-cloud-pubsub==2.10.0 +google-cloud-pubsub==2.36.0 # via -r requirements/base.in -google-cloud-storage==3.4.1 +google-cloud-storage==3.10.1 + # via gcsfs +google-cloud-storage-control==1.6.1 # via gcsfs google-crc32c==1.8.0 # via @@ -219,7 +227,7 @@ googleapis-common-protos[grpc]==1.73.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.14.4 # via google-cloud-pubsub grpcio==1.80.0 # via @@ -235,18 +243,21 @@ grpcio-status==1.63.0rc1 # google-cloud-pubsub html2text==2020.1.16 # via -r requirements/base.in -idna==2.8 +idna==3.7 # via # requests # yarl +importlib-metadata==8.4.0 + # via opentelemetry-api inflection==0.5.1 # via drf-spectacular -ipython==8.5.0 +ipython==8.10.0 # via -r requirements/base.in jedi==0.18.1 # via ipython jmespath==0.9.4 # via + # aiobotocore # boto3 # botocore jsonschema==3.2.0 @@ -257,7 +268,7 @@ kombu==5.4.2 # via celery listcrunch==1.0.0 # via -r requirements/base.in -logzio-python-handler==4.1.1 +logzio-python-handler==4.1.9 # via -r requirements/base.in luqum==0.8.1 # via -r requirements/base.in @@ -265,12 +276,13 @@ lxml==6.0.2 # via # pikepdf # premailer -markdown==3.4.1 +markdown==3.8.1 # via -r requirements/base.in matplotlib-inline==0.1.3 # via ipython -multidict==4.7.5 +multidict==6.7.1 # via + # aiobotocore # aiohttp # yarl numpy==2.2.6 @@ -279,15 +291,24 @@ oauthlib==3.1.0 # via # requests-oauthlib # social-auth-core +opentelemetry-api==1.40.0 + # via + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via opentelemetry-sdk orderedmultidict==1.0.1 # via furl packaging==24.1 # via pikepdf parso==0.8.3 # via jedi -pdfminer-six==20200517 +pdfminer-six==20251230 # via pdfplumber -pdfplumber==0.5.28 +pdfplumber==0.11.9 # via -r requirements/base.in pebble==4.5.0 # via -r requirements/base.in @@ -311,15 +332,21 @@ prompt-toolkit==3.0.38 # click-repl # ipython propcache==0.4.1 - # via yarl + # via + # aiohttp + # yarl proto-plus==1.27.1 # via + # google-api-core # google-cloud-language # google-cloud-pubsub -protobuf==4.25.2 + # google-cloud-storage-control +protobuf==5.29.6 # via # google-api-core # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status @@ -335,8 +362,6 @@ pyasn1-modules==0.2.7 # via google-auth pycparser==2.19 # via cffi -pycryptodome==3.10.1 - # via pdfminer-six pygments==2.20.0 # via ipython pyjwt==2.12.1 @@ -345,12 +370,15 @@ pyjwt==2.12.1 # social-auth-core pymupdf==1.25.3 # via -r requirements/base.in +pypdfium2==5.7.0 + # via pdfplumber pyrsistent==0.18.0 # via jsonschema pysolr==3.11.0 # via -r requirements/base.in python-dateutil==2.9.0.post0 # via + # aiobotocore # botocore # celery python-redis-lock[django]==3.3.1 @@ -361,7 +389,7 @@ python3-openid==3.1.0 # via social-auth-core pytz==2022.1 # via -r requirements/base.in -pyyaml==5.3.1 +pyyaml==6.0.3 # via drf-spectacular rcssmin==1.2.2 # via @@ -372,7 +400,7 @@ redis==3.4.1 # -r requirements/base.in # django-redis # python-redis-lock -requests==2.32.5 +requests==2.33.0 # via # gcsfs # google-api-core @@ -393,7 +421,7 @@ rjsmin==1.2.5 # via django-compressor rules==2.1 # via -r requirements/base.in -s3transfer==0.5.2 +s3transfer==0.14.0 # via boto3 six==1.17.0 # via @@ -415,9 +443,7 @@ social-auth-core[openidconnect]==4.7.0 # rest-social-auth # social-auth-app-django # squarelet-auth -sortedcontainers==2.4.0 - # via pdfminer-six -sqlparse==0.4.4 +sqlparse==0.5.4 # via # django # django-debug-toolbar @@ -434,7 +460,11 @@ traitlets==5.3.0 typing-extensions==4.15.0 # via # -r requirements/base.in + # aiosignal # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via # celery @@ -443,7 +473,7 @@ unidecode==1.1.1 # via -r requirements/base.in uritemplate==4.1.1 # via drf-spectacular -urllib3==1.26.10 +urllib3==2.6.3 # via # botocore # requests @@ -452,8 +482,6 @@ vine==5.1.0 # amqp # celery # kombu -wand==0.6.6 - # via pdfplumber wcwidth==0.1.8 # via prompt-toolkit webencodings==0.5.1 @@ -466,6 +494,8 @@ wrapt==1.17.3 # smart-open yarl==1.23.0 # via aiohttp +zipp==3.19.1 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/local.in b/requirements/local.in index b74f2fef..c6daa9b8 100644 --- a/requirements/local.in +++ b/requirements/local.in @@ -2,7 +2,7 @@ Werkzeug ipdb -Sphinx>8 +Sphinx psycopg2 --no-binary psycopg2 pip-tools pip-audit diff --git a/requirements/local.txt b/requirements/local.txt index c6818566..6e104b44 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -8,9 +8,13 @@ -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock # via -r requirements/local.in -aioboto3==9.6.0 +aioboto3==15.5.0 # via -r requirements/base.txt -aiobotocore[boto3]==2.3.0 +aiobotocore[boto3]==2.25.1 + # via + # -r requirements/base.txt + # aioboto3 +aiofiles==25.1.0 # via # -r requirements/base.txt # aioboto3 @@ -18,7 +22,7 @@ aiohappyeyeballs==2.6.1 # via # -r requirements/base.txt # aiohttp -aiohttp==3.10.5 +aiohttp==3.13.5 # via # -r requirements/base.txt # aiobotocore @@ -27,7 +31,7 @@ aioitertools==0.10.0 # via # -r requirements/base.txt # aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via # -r requirements/base.txt # aiohttp @@ -60,7 +64,6 @@ attrs==19.3.0 # -r requirements/base.txt # aiohttp # jsonschema - # pytest babel==2.18.0 # via sphinx backcall==0.1.0 @@ -71,17 +74,17 @@ billiard==4.2.1 # via # -r requirements/base.txt # celery -black==24.8.0 +black==26.3.1 # via -r requirements/local.in bleach==6.0.0 # via -r requirements/base.txt boolean-py==5.0 # via license-expression -boto3==1.21.21 +boto3==1.40.61 # via # -r requirements/base.txt # aiobotocore -botocore==1.24.21 +botocore==1.40.61 # via # -r requirements/base.txt # aiobotocore @@ -97,7 +100,7 @@ celery==5.4.0 # via # -r requirements/base.txt # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via # -r requirements/base.txt # requests @@ -106,13 +109,10 @@ cffi==2.0.0 # -r requirements/base.txt # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via - # -r requirements/base.txt - # pdfminer-six charset-normalizer==2.1.0 # via # -r requirements/base.txt + # pdfminer-six # requests click==8.1.7 # via @@ -143,10 +143,11 @@ coverage==4.5.4 # django-coverage-plugin cpuprofile==1.0.1 # via -r requirements/base.txt -cryptography==46.0.0 +cryptography==46.0.7 # via # -r requirements/base.txt # google-auth + # pdfminer-six # social-auth-core cssselect==1.1.0 # via @@ -178,7 +179,7 @@ deprecated==1.3.1 # pikepdf dill==0.3.5.1 # via pylint -django==5.2.12 +django==5.2.13 # via # -r requirements/base.txt # daily-active-users @@ -285,21 +286,22 @@ frozenlist==1.8.0 # -r requirements/base.txt # aiohttp # aiosignal -fsspec==2025.10.0 +fsspec==2026.3.0 # via # -r requirements/base.txt # gcsfs furl==2.1.0 # via -r requirements/base.txt -gcsfs==2025.10.0 +gcsfs==2026.3.0 # via -r requirements/base.txt -google-api-core[grpc]==2.17.1 +google-api-core[grpc]==2.29.0 # via # -r requirements/base.txt # google-cloud-core # google-cloud-language # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth==2.49.1 # via # -r requirements/base.txt @@ -308,7 +310,9 @@ google-auth==2.49.1 # google-auth-oauthlib # google-cloud-core # google-cloud-language + # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth-oauthlib==1.3.1 # via # -r requirements/base.txt @@ -319,9 +323,13 @@ google-cloud-core==2.5.1 # google-cloud-storage google-cloud-language==2.19.0 # via -r requirements/base.txt -google-cloud-pubsub==2.10.0 +google-cloud-pubsub==2.36.0 # via -r requirements/base.txt -google-cloud-storage==3.4.1 +google-cloud-storage==3.10.1 + # via + # -r requirements/base.txt + # gcsfs +google-cloud-storage-control==1.6.1 # via # -r requirements/base.txt # gcsfs @@ -340,7 +348,7 @@ googleapis-common-protos[grpc]==1.73.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.14.4 # via # -r requirements/base.txt # google-cloud-pubsub @@ -362,15 +370,17 @@ html2text==2020.1.16 # via -r requirements/base.txt html5lib==1.1 # via pip-audit -idna==2.8 +idna==3.7 # via # -r requirements/base.txt # requests # yarl imagesize==2.0.0 # via sphinx -importlib-metadata==6.0.0 - # via pluggy +importlib-metadata==8.4.0 + # via + # -r requirements/base.txt + # opentelemetry-api inflection==0.5.1 # via # -r requirements/base.txt @@ -379,7 +389,7 @@ iniconfig==1.1.1 # via pytest ipdb==0.13.9 # via -r requirements/local.in -ipython==8.5.0 +ipython==8.10.0 # via # -r requirements/base.txt # ipdb @@ -394,6 +404,7 @@ jinja2==3.1.6 jmespath==0.9.4 # via # -r requirements/base.txt + # aiobotocore # boto3 # botocore jsonschema==3.2.0 @@ -412,7 +423,7 @@ license-expression==30.4.4 # via cyclonedx-python-lib listcrunch==1.0.0 # via -r requirements/base.txt -logzio-python-handler==4.1.1 +logzio-python-handler==4.1.9 # via -r requirements/base.txt luqum==0.8.1 # via -r requirements/base.txt @@ -421,12 +432,14 @@ lxml==6.0.2 # -r requirements/base.txt # pikepdf # premailer -markdown==3.4.1 +markdown==3.8.1 # via -r requirements/base.txt markdown-it-py==4.0.0 # via rich markupsafe==3.0.3 - # via jinja2 + # via + # jinja2 + # werkzeug matplotlib-inline==0.1.3 # via # -r requirements/base.txt @@ -439,9 +452,10 @@ mdurl==0.1.2 # via markdown-it-py msgpack==1.1.2 # via cachecontrol -multidict==4.7.5 +multidict==6.7.1 # via # -r requirements/base.txt + # aiobotocore # aiohttp # yarl mypy-extensions==0.4.3 @@ -453,6 +467,20 @@ oauthlib==3.1.0 # -r requirements/base.txt # requests-oauthlib # social-auth-core +opentelemetry-api==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via + # -r requirements/base.txt + # opentelemetry-sdk orderedmultidict==1.0.1 # via # -r requirements/base.txt @@ -470,17 +498,18 @@ packaging==24.1 # pytest # pytest-sugar # sphinx + # wheel parso==0.8.3 # via # -r requirements/base.txt # jedi -pathspec==0.9.0 +pathspec==1.0.4 # via black -pdfminer-six==20200517 +pdfminer-six==20251230 # via # -r requirements/base.txt # pdfplumber -pdfplumber==0.5.28 +pdfplumber==0.11.9 # via -r requirements/base.txt pebble==4.5.0 # via -r requirements/base.txt @@ -511,7 +540,7 @@ platformdirs==2.5.2 # via # black # pylint -pluggy==0.12.0 +pluggy==1.6.0 # via pytest ply==3.11 # via @@ -529,17 +558,22 @@ prompt-toolkit==3.0.38 propcache==0.4.1 # via # -r requirements/base.txt + # aiohttp # yarl proto-plus==1.27.1 # via # -r requirements/base.txt + # google-api-core # google-cloud-language # google-cloud-pubsub -protobuf==4.25.2 + # google-cloud-storage-control +protobuf==5.29.6 # via # -r requirements/base.txt # google-api-core # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status @@ -555,8 +589,6 @@ pure-eval==0.2.2 # via # -r requirements/base.txt # stack-data -py==1.8.2 - # via pytest py-serializable==1.1.2 # via cyclonedx-python-lib pyasn1==0.4.7 @@ -573,16 +605,13 @@ pycparser==2.19 # via # -r requirements/base.txt # cffi -pycryptodome==3.10.1 - # via - # -r requirements/base.txt - # pdfminer-six pyflakes==2.1.1 # via flake8 pygments==2.20.0 # via # -r requirements/base.txt # ipython + # pytest # rich # sphinx pyjwt==2.12.1 @@ -607,6 +636,10 @@ pymupdf==1.25.3 # via -r requirements/base.txt pyparsing==3.3.2 # via pip-requirements-parser +pypdfium2==5.7.0 + # via + # -r requirements/base.txt + # pdfplumber pyproject-hooks==1.2.0 # via # build @@ -617,7 +650,7 @@ pyrsistent==0.18.0 # jsonschema pysolr==3.11.0 # via -r requirements/base.txt -pytest==7.1.2 +pytest==9.0.3 # via # -r requirements/local.in # pytest-django @@ -635,6 +668,7 @@ pytest-watch==4.2.0 python-dateutil==2.9.0.post0 # via # -r requirements/base.txt + # aiobotocore # botocore # celery # faker @@ -648,9 +682,11 @@ python3-openid==3.1.0 # via # -r requirements/base.txt # social-auth-core +pytokens==0.4.1 + # via black pytz==2022.1 # via -r requirements/base.txt -pyyaml==5.3.1 +pyyaml==6.0.3 # via # -r requirements/base.txt # drf-spectacular @@ -666,7 +702,7 @@ redis==3.4.1 # django-redis # fakeredis # python-redis-lock -requests==2.32.5 +requests==2.33.0 # via # -r requirements/base.txt # cachecontrol @@ -699,7 +735,7 @@ rjsmin==1.2.5 # django-compressor rules==2.1 # via -r requirements/base.txt -s3transfer==0.5.2 +s3transfer==0.14.0 # via # -r requirements/base.txt # boto3 @@ -734,10 +770,8 @@ social-auth-core[openidconnect]==4.7.0 # squarelet-auth sortedcontainers==2.4.0 # via - # -r requirements/base.txt # cyclonedx-python-lib # fakeredis - # pdfminer-six sphinx==8.1.3 # via -r requirements/local.in sphinxcontrib-applehelp==2.0.0 @@ -752,7 +786,7 @@ sphinxcontrib-qthelp==2.0.0 # via sphinx sphinxcontrib-serializinghtml==2.0.0 # via sphinx -sqlparse==0.4.4 +sqlparse==0.5.4 # via # -r requirements/base.txt # django @@ -774,8 +808,6 @@ toml==0.10.2 # via # ipdb # pip-audit -tomli==2.0.1 - # via pytest tomlkit==0.11.4 # via pylint traitlets==5.3.0 @@ -786,7 +818,11 @@ traitlets==5.3.0 typing-extensions==4.15.0 # via # -r requirements/base.txt + # aiosignal # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via # -r requirements/base.txt @@ -800,7 +836,7 @@ uritemplate==4.1.1 # via # -r requirements/base.txt # drf-spectacular -urllib3==1.26.10 +urllib3==2.6.3 # via # -r requirements/base.txt # botocore @@ -811,10 +847,6 @@ vine==5.1.0 # amqp # celery # kombu -wand==0.6.6 - # via - # -r requirements/base.txt - # pdfplumber watchdog==6.0.0 # via pytest-watch wcwidth==0.1.8 @@ -826,9 +858,9 @@ webencodings==0.5.1 # -r requirements/base.txt # bleach # html5lib -werkzeug==2.1.2 +werkzeug==3.1.8 # via -r requirements/local.in -wheel==0.45.1 +wheel==0.46.2 # via pip-tools wrapt==1.17.3 # via @@ -841,8 +873,10 @@ yarl==1.23.0 # via # -r requirements/base.txt # aiohttp -zipp==3.11.0 - # via importlib-metadata +zipp==3.19.1 + # via + # -r requirements/base.txt + # importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/production.txt b/requirements/production.txt index 892e4334..0418c215 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -6,9 +6,13 @@ # --no-binary psycopg2 -aioboto3==9.6.0 +aioboto3==15.5.0 # via -r requirements/base.txt -aiobotocore[boto3]==2.3.0 +aiobotocore[boto3]==2.25.1 + # via + # -r requirements/base.txt + # aioboto3 +aiofiles==25.1.0 # via # -r requirements/base.txt # aioboto3 @@ -16,7 +20,7 @@ aiohappyeyeballs==2.6.1 # via # -r requirements/base.txt # aiohttp -aiohttp==3.10.5 +aiohttp==3.13.5 # via # -r requirements/base.txt # aiobotocore @@ -25,7 +29,7 @@ aioitertools==0.10.0 # via # -r requirements/base.txt # aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via # -r requirements/base.txt # aiohttp @@ -63,12 +67,12 @@ billiard==4.2.1 # celery bleach==6.0.0 # via -r requirements/base.txt -boto3==1.21.21 +boto3==1.40.61 # via # -r requirements/base.txt # aiobotocore # django-storages -botocore==1.24.21 +botocore==1.40.61 # via # -r requirements/base.txt # aiobotocore @@ -79,24 +83,21 @@ celery==5.4.0 # -r requirements/base.txt # django-celery-email # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via # -r requirements/base.txt # requests + # scout-apm # sentry-sdk - # urllib3 cffi==2.0.0 # via # -r requirements/base.txt # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via - # -r requirements/base.txt - # pdfminer-six charset-normalizer==2.1.0 # via # -r requirements/base.txt + # pdfminer-six # requests click==8.1.7 # via @@ -121,14 +122,12 @@ collectfast==2.2.0 # via -r requirements/production.in cpuprofile==1.0.1 # via -r requirements/base.txt -cryptography==46.0.0 +cryptography==46.0.7 # via # -r requirements/base.txt - # django-anymail # google-auth - # pyopenssl + # pdfminer-six # social-auth-core - # urllib3 cssselect==1.1.0 # via # -r requirements/base.txt @@ -153,7 +152,7 @@ deprecated==1.3.1 # via # -r requirements/base.txt # pikepdf -django==5.2.12 +django==5.2.13 # via # -r requirements/base.txt # collectfast @@ -179,7 +178,7 @@ django==5.2.12 # rest-social-auth # social-auth-app-django # squarelet-auth -django-anymail[mailgun]==10.0 +django-anymail[mailgun]==14.0 # via -r requirements/production.in django-appconf==1.0.3 # via @@ -222,7 +221,7 @@ django-redis==5.0.0 # python-redis-lock django-robots==6.1 # via -r requirements/base.txt -django-storages[boto3]==1.12.3 +django-storages[boto3]==1.14.6 # via # -r requirements/production.in # collectfast @@ -255,21 +254,22 @@ frozenlist==1.8.0 # -r requirements/base.txt # aiohttp # aiosignal -fsspec==2025.10.0 +fsspec==2026.3.0 # via # -r requirements/base.txt # gcsfs furl==2.1.0 # via -r requirements/base.txt -gcsfs==2025.10.0 +gcsfs==2026.3.0 # via -r requirements/base.txt -google-api-core[grpc]==2.17.1 +google-api-core[grpc]==2.29.0 # via # -r requirements/base.txt # google-cloud-core # google-cloud-language # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth==2.49.1 # via # -r requirements/base.txt @@ -278,7 +278,9 @@ google-auth==2.49.1 # google-auth-oauthlib # google-cloud-core # google-cloud-language + # google-cloud-pubsub # google-cloud-storage + # google-cloud-storage-control google-auth-oauthlib==1.3.1 # via # -r requirements/base.txt @@ -289,9 +291,13 @@ google-cloud-core==2.5.1 # google-cloud-storage google-cloud-language==2.19.0 # via -r requirements/base.txt -google-cloud-pubsub==2.10.0 +google-cloud-pubsub==2.36.0 # via -r requirements/base.txt -google-cloud-storage==3.4.1 +google-cloud-storage==3.10.1 + # via + # -r requirements/base.txt + # gcsfs +google-cloud-storage-control==1.6.1 # via # -r requirements/base.txt # gcsfs @@ -310,7 +316,7 @@ googleapis-common-protos[grpc]==1.73.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.14.4 # via # -r requirements/base.txt # google-cloud-pubsub @@ -332,17 +338,21 @@ gunicorn==20.1.0 # via -r requirements/production.in html2text==2020.1.16 # via -r requirements/base.txt -idna==2.8 +idna==3.7 # via # -r requirements/base.txt + # django-anymail # requests - # urllib3 # yarl +importlib-metadata==8.4.0 + # via + # -r requirements/base.txt + # opentelemetry-api inflection==0.5.1 # via # -r requirements/base.txt # drf-spectacular -ipython==8.5.0 +ipython==8.10.0 # via -r requirements/base.txt jedi==0.18.1 # via @@ -351,6 +361,7 @@ jedi==0.18.1 jmespath==0.9.4 # via # -r requirements/base.txt + # aiobotocore # boto3 # botocore jsonschema==3.2.0 @@ -363,7 +374,7 @@ kombu==5.4.2 # celery listcrunch==1.0.0 # via -r requirements/base.txt -logzio-python-handler==4.1.1 +logzio-python-handler==4.1.9 # via -r requirements/base.txt luqum==0.8.1 # via -r requirements/base.txt @@ -372,15 +383,16 @@ lxml==6.0.2 # -r requirements/base.txt # pikepdf # premailer -markdown==3.4.1 +markdown==3.8.1 # via -r requirements/base.txt matplotlib-inline==0.1.3 # via # -r requirements/base.txt # ipython -multidict==4.7.5 +multidict==6.7.1 # via # -r requirements/base.txt + # aiobotocore # aiohttp # yarl numpy==2.2.6 @@ -390,6 +402,20 @@ oauthlib==3.1.0 # -r requirements/base.txt # requests-oauthlib # social-auth-core +opentelemetry-api==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via + # -r requirements/base.txt + # opentelemetry-sdk orderedmultidict==1.0.1 # via # -r requirements/base.txt @@ -402,11 +428,11 @@ parso==0.8.3 # via # -r requirements/base.txt # jedi -pdfminer-six==20200517 +pdfminer-six==20251230 # via # -r requirements/base.txt # pdfplumber -pdfplumber==0.5.28 +pdfplumber==0.11.9 # via -r requirements/base.txt pebble==4.5.0 # via -r requirements/base.txt @@ -441,17 +467,22 @@ prompt-toolkit==3.0.38 propcache==0.4.1 # via # -r requirements/base.txt + # aiohttp # yarl proto-plus==1.27.1 # via # -r requirements/base.txt + # google-api-core # google-cloud-language # google-cloud-pubsub -protobuf==4.25.2 + # google-cloud-storage-control +protobuf==5.29.6 # via # -r requirements/base.txt # google-api-core # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status @@ -481,10 +512,6 @@ pycparser==2.19 # via # -r requirements/base.txt # cffi -pycryptodome==3.10.1 - # via - # -r requirements/base.txt - # pdfminer-six pygments==2.20.0 # via # -r requirements/base.txt @@ -496,8 +523,10 @@ pyjwt==2.12.1 # social-auth-core pymupdf==1.25.3 # via -r requirements/base.txt -pyopenssl==19.1.0 - # via urllib3 +pypdfium2==5.7.0 + # via + # -r requirements/base.txt + # pdfplumber pyrsistent==0.18.0 # via # -r requirements/base.txt @@ -507,6 +536,7 @@ pysolr==3.11.0 python-dateutil==2.9.0.post0 # via # -r requirements/base.txt + # aiobotocore # botocore # celery python-redis-lock[django]==3.3.1 @@ -519,7 +549,7 @@ python3-openid==3.1.0 # social-auth-core pytz==2022.1 # via -r requirements/base.txt -pyyaml==5.3.1 +pyyaml==6.0.3 # via # -r requirements/base.txt # drf-spectacular @@ -532,7 +562,7 @@ redis==3.4.1 # -r requirements/base.txt # django-redis # python-redis-lock -requests==2.32.5 +requests==2.33.0 # via # -r requirements/base.txt # django-anymail @@ -558,13 +588,13 @@ rjsmin==1.2.5 # django-compressor rules==2.1 # via -r requirements/base.txt -s3transfer==0.5.2 +s3transfer==0.14.0 # via # -r requirements/base.txt # boto3 -scout-apm==2.17.0 +scout-apm==3.5.3 # via -r requirements/production.in -sentry-sdk==1.0.0 +sentry-sdk==2.57.0 # via -r requirements/production.in six==1.17.0 # via @@ -575,7 +605,6 @@ six==1.17.0 # furl # jsonschema # orderedmultidict - # pyopenssl # python-dateutil smart-open==7.5.1 # via -r requirements/base.txt @@ -589,11 +618,7 @@ social-auth-core[openidconnect]==4.7.0 # rest-social-auth # social-auth-app-django # squarelet-auth -sortedcontainers==2.4.0 - # via - # -r requirements/base.txt - # pdfminer-six -sqlparse==0.4.4 +sqlparse==0.5.4 # via # -r requirements/base.txt # django @@ -616,8 +641,12 @@ traitlets==5.3.0 typing-extensions==4.15.0 # via # -r requirements/base.txt + # aiosignal # collectfast # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via # -r requirements/base.txt @@ -629,7 +658,7 @@ uritemplate==4.1.1 # via # -r requirements/base.txt # drf-spectacular -urllib3[secure]==1.26.10 +urllib3==2.6.3 # via # -r requirements/base.txt # botocore @@ -643,10 +672,6 @@ vine==5.1.0 # amqp # celery # kombu -wand==0.6.6 - # via - # -r requirements/base.txt - # pdfplumber wcwidth==0.1.8 # via # -r requirements/base.txt @@ -666,6 +691,10 @@ yarl==1.23.0 # via # -r requirements/base.txt # aiohttp +zipp==3.19.1 + # via + # -r requirements/base.txt + # importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From e30b094691684e739a55cb42764f82ec033b9170 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:27:06 -0500 Subject: [PATCH 03/40] Add back opencv --- documentcloud/conftest.py | 5 +++-- requirements/base.in | 1 + requirements/base.txt | 6 +++++- requirements/local.in | 2 +- requirements/local.txt | 11 +++++++---- requirements/production.txt | 6 +++++- rootCA.pem | 26 ++++++++++++++++++++++++++ 7 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 rootCA.pem diff --git a/documentcloud/conftest.py b/documentcloud/conftest.py index b2c7962f..e3b31fa6 100644 --- a/documentcloud/conftest.py +++ b/documentcloud/conftest.py @@ -24,12 +24,13 @@ from documentcloud.users.tests.factories import UserFactory -def pytest_ignore_collect(path, config): +def pytest_ignore_collect(collection_path, config): """Do not recurse into symlinks when collecting tests Used to ignore symlinks we have in processing to the common module """ # pylint: disable=unused-argument - return path.isdir() and path.islink() + return collection_path.is_dir() and collection_path.is_symlink() + @pytest.fixture diff --git a/requirements/base.in b/requirements/base.in index 7f3f379f..ce70f316 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -61,6 +61,7 @@ wrapt>=1.14.0,<2 cpuprofile listcrunch numpy +opencv-python-headless # Typing typing-extensions diff --git a/requirements/base.txt b/requirements/base.txt index 65fbdd12..f599d526 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -286,11 +286,15 @@ multidict==6.7.1 # aiohttp # yarl numpy==2.2.6 - # via -r requirements/base.in + # via + # -r requirements/base.in + # opencv-python-headless oauthlib==3.1.0 # via # requests-oauthlib # social-auth-core +opencv-python-headless==4.13.0.92 + # via -r requirements/base.in opentelemetry-api==1.40.0 # via # google-cloud-pubsub diff --git a/requirements/local.in b/requirements/local.in index c6daa9b8..28f47dbb 100644 --- a/requirements/local.in +++ b/requirements/local.in @@ -10,7 +10,7 @@ pip-audit # Testing # ------------------------------------------------------------------------------ pytest -pytest-sugar +pytest-sugar>=1 pytest-mock -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock pytest-watch diff --git a/requirements/local.txt b/requirements/local.txt index 6e104b44..fb84ef44 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -461,12 +461,16 @@ multidict==6.7.1 mypy-extensions==0.4.3 # via black numpy==2.2.6 - # via -r requirements/base.txt + # via + # -r requirements/base.txt + # opencv-python-headless oauthlib==3.1.0 # via # -r requirements/base.txt # requests-oauthlib # social-auth-core +opencv-python-headless==4.13.0.92 + # via -r requirements/base.txt opentelemetry-api==1.40.0 # via # -r requirements/base.txt @@ -496,7 +500,6 @@ packaging==24.1 # pip-audit # pip-requirements-parser # pytest - # pytest-sugar # sphinx # wheel parso==0.8.3 @@ -661,7 +664,7 @@ pytest-django==4.5.2 # via -r requirements/local.in pytest-mock==3.8.2 # via -r requirements/local.in -pytest-sugar==0.9.2 +pytest-sugar==1.1.1 # via -r requirements/local.in pytest-watch==4.2.0 # via -r requirements/local.in @@ -797,7 +800,7 @@ stack-data==0.3.0 # via # -r requirements/base.txt # ipython -termcolor==1.1.0 +termcolor==3.3.0 # via pytest-sugar text-unidecode==1.2 # via diff --git a/requirements/production.txt b/requirements/production.txt index 0418c215..7ee08655 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -396,12 +396,16 @@ multidict==6.7.1 # aiohttp # yarl numpy==2.2.6 - # via -r requirements/base.txt + # via + # -r requirements/base.txt + # opencv-python-headless oauthlib==3.1.0 # via # -r requirements/base.txt # requests-oauthlib # social-auth-core +opencv-python-headless==4.13.0.92 + # via -r requirements/base.txt opentelemetry-api==1.40.0 # via # -r requirements/base.txt diff --git a/rootCA.pem b/rootCA.pem new file mode 100644 index 00000000..1c25f618 --- /dev/null +++ b/rootCA.pem @@ -0,0 +1,26 @@ +-----BEGIN CERTIFICATE----- +MIIEaTCCAtGgAwIBAgIQRbE6XHRWgxwPGwd5/bg7lzANBgkqhkiG9w0BAQsFADBN +MR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0ExETAPBgNVBAsMCHNAcG9w +LW9zMRgwFgYDVQQDDA9ta2NlcnQgc0Bwb3Atb3MwHhcNMjQwODAxMTg0MjAyWhcN +MzQwODAxMTg0MjAyWjBNMR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0Ex +ETAPBgNVBAsMCHNAcG9wLW9zMRgwFgYDVQQDDA9ta2NlcnQgc0Bwb3Atb3MwggGi +MA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDP5V3f2znELTU28VdycaXPE0LD +D1oWto2x4HH+eDV/pzhf4szLf8/ay/t4KxZZEz9IFWXCGTv0fPST0aqV9ji68QWr +CKs/QeQvjRXoHH+T1TVGEo4CKwDUu52DfB0orlTSBF4j8GMy4jiMz6vi9ZO9izLp +Ko+1VH7dTXAtEy14Mvlg62TbruxaZIdJIR2tQtcer4/71j7n5EkPMPCi+U8P3no0 +wMPxStDo3A5LlVgA2APevsFww+4M2CCcfsQ9LhAaGsSxIIOZCyBq45HYV2zqGWTq +W6VuM0buKtTzTbJyAPFB9mwIyTgNklrV81jSB2ZxirFeNVN/LC8hQcKyeEPWF32p +aPckCXtGGExMRA355Yu9ygxKQH8P6ncNEboPa+yj1/EmhfOhxkvskAK12y06N3ZH +CLyiatSthZuVI7MvyDbkdp4TEqQUm5FIpCx3CmyuedG4bgTkaCVZkdQxLJEMAqDG +JhMG6fPUTIfK0qX8gUItaLbKeBnNDJVTdbA54ksCAwEAAaNFMEMwDgYDVR0PAQH/ +BAQDAgIEMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFIIBM8rKoL4Afa9b +7RgDpAi9VJjDMA0GCSqGSIb3DQEBCwUAA4IBgQBYAjxQzvhVle17grIGgu+m1oeA +Qsv1Pv4pXj4JUs4tNlmyFVcZlPl5rPtbqs4nZhtjD6eRAlil9+W1Gu69iIIIrSnF +c1zpmYNpO36AsqxAbKospID119OWuTVZTMZrgN23D9STonBYGPfR2NI8LGPrNVKT +5B9mpow2teH6ulb7yniEUNgXh+42freeXoGnOw+CCEWkOIJYgYtO+qz7jqY3ZDCd +MsEBrQQuK6WWrKsKrVa9wvIqTRyNXKt1LSbwfER2sWckintpMvLasmKzzdJJ+x9w +c21prHtmryK1Kyz4OXPlQZ6RjmitV/X8JiCDXNcOAl8CKSlZNwhd2XZBhmoqe8Cm +dqHqTTimMqkNmNftAQuWaXSZnJ+fbMLfRKMHk6cwuYkPPI1tVcSHhjGzH7TjGl6r +J9C95gZxmNRbvhaGA9udubVZcxkiethqXorHJTprsav4e1uI6qirD53rMNRuY5GI +mNV0wpl7NSHnQkS1pMsTQgaQITO2w3bpfub0iws= +-----END CERTIFICATE----- From 44b6655a6f4468ade2b661ad3e33130b708a0927 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:28:25 -0500 Subject: [PATCH 04/40] Remove rootCA.pem --- rootCA.pem | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 rootCA.pem diff --git a/rootCA.pem b/rootCA.pem deleted file mode 100644 index 1c25f618..00000000 --- a/rootCA.pem +++ /dev/null @@ -1,26 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIEaTCCAtGgAwIBAgIQRbE6XHRWgxwPGwd5/bg7lzANBgkqhkiG9w0BAQsFADBN -MR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0ExETAPBgNVBAsMCHNAcG9w -LW9zMRgwFgYDVQQDDA9ta2NlcnQgc0Bwb3Atb3MwHhcNMjQwODAxMTg0MjAyWhcN -MzQwODAxMTg0MjAyWjBNMR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0Ex -ETAPBgNVBAsMCHNAcG9wLW9zMRgwFgYDVQQDDA9ta2NlcnQgc0Bwb3Atb3MwggGi -MA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDP5V3f2znELTU28VdycaXPE0LD -D1oWto2x4HH+eDV/pzhf4szLf8/ay/t4KxZZEz9IFWXCGTv0fPST0aqV9ji68QWr -CKs/QeQvjRXoHH+T1TVGEo4CKwDUu52DfB0orlTSBF4j8GMy4jiMz6vi9ZO9izLp -Ko+1VH7dTXAtEy14Mvlg62TbruxaZIdJIR2tQtcer4/71j7n5EkPMPCi+U8P3no0 -wMPxStDo3A5LlVgA2APevsFww+4M2CCcfsQ9LhAaGsSxIIOZCyBq45HYV2zqGWTq -W6VuM0buKtTzTbJyAPFB9mwIyTgNklrV81jSB2ZxirFeNVN/LC8hQcKyeEPWF32p -aPckCXtGGExMRA355Yu9ygxKQH8P6ncNEboPa+yj1/EmhfOhxkvskAK12y06N3ZH -CLyiatSthZuVI7MvyDbkdp4TEqQUm5FIpCx3CmyuedG4bgTkaCVZkdQxLJEMAqDG -JhMG6fPUTIfK0qX8gUItaLbKeBnNDJVTdbA54ksCAwEAAaNFMEMwDgYDVR0PAQH/ -BAQDAgIEMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFIIBM8rKoL4Afa9b -7RgDpAi9VJjDMA0GCSqGSIb3DQEBCwUAA4IBgQBYAjxQzvhVle17grIGgu+m1oeA -Qsv1Pv4pXj4JUs4tNlmyFVcZlPl5rPtbqs4nZhtjD6eRAlil9+W1Gu69iIIIrSnF -c1zpmYNpO36AsqxAbKospID119OWuTVZTMZrgN23D9STonBYGPfR2NI8LGPrNVKT -5B9mpow2teH6ulb7yniEUNgXh+42freeXoGnOw+CCEWkOIJYgYtO+qz7jqY3ZDCd -MsEBrQQuK6WWrKsKrVa9wvIqTRyNXKt1LSbwfER2sWckintpMvLasmKzzdJJ+x9w -c21prHtmryK1Kyz4OXPlQZ6RjmitV/X8JiCDXNcOAl8CKSlZNwhd2XZBhmoqe8Cm -dqHqTTimMqkNmNftAQuWaXSZnJ+fbMLfRKMHk6cwuYkPPI1tVcSHhjGzH7TjGl6r -J9C95gZxmNRbvhaGA9udubVZcxkiethqXorHJTprsav4e1uI6qirD53rMNRuY5GI -mNV0wpl7NSHnQkS1pMsTQgaQITO2w3bpfub0iws= ------END CERTIFICATE----- From bb5c7c6478420290851ef7eceead386b5f52c89d Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 17:37:04 -0500 Subject: [PATCH 05/40] Get tests working --- .gitignore | 1 + documentcloud/addons/tests/test_views.py | 4 +- documentcloud/documents/models/note.py | 2 +- documentcloud/documents/serializers.py | 2 +- documentcloud/documents/tests/test_search.py | 1 + documentcloud/documents/views.py | 2 +- ...io_alter_user_email_alter_user_username.py | 475 ++++++++++++++++++ .../migrations/0008_user_active_addons.py | 19 - .../users/migrations/0009_user_mailkey.py | 2 +- .../migrations/0013_user_active_addons.py | 21 + .../migrations/0014_merge_20260414_1910.py | 13 + 11 files changed, 517 insertions(+), 25 deletions(-) create mode 100644 documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py delete mode 100644 documentcloud/users/migrations/0008_user_active_addons.py create mode 100644 documentcloud/users/migrations/0013_user_active_addons.py create mode 100644 documentcloud/users/migrations/0014_merge_20260414_1910.py diff --git a/.gitignore b/.gitignore index b1d474b4..83882349 100644 --- a/.gitignore +++ b/.gitignore @@ -306,3 +306,4 @@ CLAUDE.md .claude rootCA.pem +tests.txt diff --git a/documentcloud/addons/tests/test_views.py b/documentcloud/addons/tests/test_views.py index cb5c98b7..bf9f86d3 100644 --- a/documentcloud/addons/tests/test_views.py +++ b/documentcloud/addons/tests/test_views.py @@ -262,9 +262,9 @@ def test_update_no_addon(self, client): def test_destroy(self, client, mocker): """Destroying an addon run cancels it""" - cancel = mocker.patch.object(AddOnRun, "cancel") + cancel = mocker.patch("documentcloud.addons.views.cancel.delay") run = AddOnRunFactory() client.force_authenticate(user=run.user) response = client.delete(f"/api/addon_runs/{run.uuid}/") assert response.status_code == status.HTTP_204_NO_CONTENT - assert cancel.called_once() + cancel.assert_called_once() diff --git a/documentcloud/documents/models/note.py b/documentcloud/documents/models/note.py index c05e3d8c..a5d925d4 100644 --- a/documentcloud/documents/models/note.py +++ b/documentcloud/documents/models/note.py @@ -165,7 +165,7 @@ def solr(self): "type": "note", "user": self.user_id, "organization": self.organization_id, - "access": Access.attributes[self.access], + "access": Access(self.access).name, "page_count": self.page_number, "title": self.title, "description": self.content, diff --git a/documentcloud/documents/serializers.py b/documentcloud/documents/serializers.py index bd936e6f..4167a214 100644 --- a/documentcloud/documents/serializers.py +++ b/documentcloud/documents/serializers.py @@ -850,7 +850,7 @@ class EntityOccurrenceSerializer(serializers.ModelSerializer): def get_occurrences(self, obj): def fix(entity): value = entity.pop("kind", 0) - entity["kind"] = OccurrenceKind.attributes.get(value, value) + entity["kind"] = OccurrenceKind(value).name if value in OccurrenceKind.values else value return entity return [fix(e) for e in obj.occurrences] diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 376b6446..7c36435e 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -106,6 +106,7 @@ def setup_solr(django_db_setup, django_db_blocker): for note in notes.values(): solr_notes.add([note.solr()]) solr.commit() + solr_notes.commit() yield finally: Document.objects.all().delete() diff --git a/documentcloud/documents/views.py b/documentcloud/documents/views.py index bc016f32..dd8c9254 100644 --- a/documentcloud/documents/views.py +++ b/documentcloud/documents/views.py @@ -116,7 +116,7 @@ class DocumentViewSet(BulkModelMixin, FlexFieldsModelViewSet): serializer_class = DocumentSerializer queryset = Document.objects.none() permission_classes = ( - IsAuthenticated| DocumentTokenPermissions, + DjangoObjectPermissionsOrAnonReadOnly | DocumentTokenPermissions, ) @extend_schema( diff --git a/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py b/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py new file mode 100644 index 00000000..64f3bc5e --- /dev/null +++ b/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py @@ -0,0 +1,475 @@ +# Generated by Django 5.2.13 on 2026-04-14 18:44 + +import django.contrib.postgres.fields.citext +import django.contrib.postgres.operations +import django.utils.timezone +import documentcloud.core.fields +import documentcloud.users.managers +import squarelet_auth.fields +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + replaces = [ + ("users", "0001_initial"), + ("users", "0002_auto_20200128_1418"), + ("users", "0003_auto_20200214_1640"), + ("users", "0004_auto_20200306_2000"), + ("users", "0005_auto_20200523_1534"), + ("users", "0006_auto_20200925_2001"), + ("users", "0007_auto_20211102_1707"), + ("users", "0009_user_mailkey"), + ("users", "0010_user_bio_alter_user_email_alter_user_username"), + ] + + dependencies = [ + ("auth", "0011_update_proxy_permissions"), + ] + + operations = [ + django.contrib.postgres.operations.CITextExtension(), + django.contrib.postgres.operations.CreateCollation( + name="case_insensitive", + locale="und-u-ks-level2", + provider="icu", + deterministic=False, + ), + migrations.CreateModel( + name="User", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("password", models.CharField(max_length=128, verbose_name="password")), + ( + "last_login", + models.DateTimeField( + blank=True, null=True, verbose_name="last login" + ), + ), + ( + "is_superuser", + models.BooleanField( + default=False, + help_text="Designates that this user has all permissions without explicitly assigning them.", + verbose_name="superuser status", + ), + ), + ( + "uuid", + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + help_text="Unique ID to link users across MuckRock's sites", + unique=True, + verbose_name="UUID", + ), + ), + ( + "name", + models.CharField( + help_text="The user's full name", + max_length=255, + verbose_name="full name", + ), + ), + ( + "email", + models.EmailField( + db_collation="case_insensitive", + help_text="The user's primary email address", + max_length=254, + null=True, + unique=True, + verbose_name="email", + ), + ), + ( + "username", + models.CharField( + db_collation="case_insensitive", + help_text="A unique public identifier for the user", + max_length=150, + unique=True, + verbose_name="username", + ), + ), + ( + "avatar_url", + models.URLField( + blank=True, + help_text="A URL which points to an avatar for the user", + max_length=255, + verbose_name="avatar url", + ), + ), + ( + "is_staff", + models.BooleanField( + default=False, + help_text="Designates whether the user can log into this admin site.", + verbose_name="staff status", + ), + ), + ( + "is_active", + models.BooleanField( + default=True, + help_text="Designates whether this user should be treated as active. Unselect this instead of deleting accounts.", + verbose_name="active", + ), + ), + ( + "email_failed", + models.BooleanField( + default=False, + help_text="Has an email we sent to this user's email address failed?", + verbose_name="email failed", + ), + ), + ( + "email_verified", + models.BooleanField( + default=False, + help_text="Has this user's email address been verified?", + verbose_name="email verified", + ), + ), + ( + "created_at", + documentcloud.core.fields.AutoCreatedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was created", + verbose_name="created at", + ), + ), + ( + "updated_at", + documentcloud.core.fields.AutoLastModifiedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was last updated", + verbose_name="updated at", + ), + ), + ( + "use_autologin", + models.BooleanField( + default=True, + help_text="Links you receive in emails from us will contain a token to automatically log you in", + verbose_name="use autologin", + ), + ), + ( + "groups", + models.ManyToManyField( + blank=True, + help_text="The groups this user belongs to. A user will get all permissions granted to each of their groups.", + related_name="user_set", + related_query_name="user", + to="auth.group", + verbose_name="groups", + ), + ), + ( + "user_permissions", + models.ManyToManyField( + blank=True, + help_text="Specific permissions for this user.", + related_name="user_set", + related_query_name="user", + to="auth.permission", + verbose_name="user permissions", + ), + ), + ], + options={ + "abstract": False, + "ordering": ("username",), + }, + managers=[ + ("objects", documentcloud.users.managers.UserManager()), + ], + ), + migrations.RunSQL( + sql="ALTER SEQUENCE users_user_id_seq RESTART WITH 100000", + ), + migrations.AlterField( + model_name="user", + name="created_at", + field=squarelet_auth.fields.AutoCreatedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was created", + verbose_name="created at", + ), + ), + migrations.AlterField( + model_name="user", + name="updated_at", + field=squarelet_auth.fields.AutoLastModifiedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was last updated", + verbose_name="updated at", + ), + ), + migrations.AddField( + model_name="user", + name="document_language", + field=models.CharField( + blank=True, + choices=[ + ("afr", "Afrikaans"), + ("amh", "Amharic"), + ("ara", "Arabic"), + ("asm", "Assamese"), + ("aze", "Azerbaijani"), + ("aze_cyrl", "Azerbaijani - Cyrillic"), + ("bel", "Belarusian"), + ("ben", "Bengali"), + ("bod", "Tibetan"), + ("bos", "Bosnian"), + ("bul", "Bulgarian"), + ("cat", "Catalan; Valencian"), + ("ceb", "Cebuano"), + ("ces", "Czech"), + ("zho", "Chinese - Simplified"), + ("tra", "Chinese - Traditional"), + ("chr", "Cherokee"), + ("cym", "Welsh"), + ("dan", "Danish"), + ("deu", "German"), + ("dzo", "Dzongkha"), + ("ell", "Greek"), + ("eng", "English"), + ("enm", "Middle English"), + ("epo", "Esperanto"), + ("est", "Estonian"), + ("eus", "Basque"), + ("fas", "Persian"), + ("fin", "Finnish"), + ("fra", "French"), + ("frk", "German Fraktur"), + ("frm", "Middle French"), + ("gle", "Irish"), + ("glg", "Galician"), + ("grc", "Ancient Greek"), + ("guj", "Gujarati"), + ("hat", "Haitian; Haitian Creole"), + ("heb", "Hebrew"), + ("hin", "Hindi"), + ("hrv", "Croatian"), + ("hun", "Hungarian"), + ("iku", "Inuktitut"), + ("ind", "Indonesian"), + ("isl", "Icelandic"), + ("ita", "Italian"), + ("ita_old", "Italian - Old"), + ("jav", "Javanese"), + ("jpn", "Japanese"), + ("kan", "Kannada"), + ("kat", "Georgian"), + ("kat_old", "Georgian - Old"), + ("kaz", "Kazakh"), + ("khm", "Central Khmer"), + ("kir", "Kirghiz; Kyrgyz"), + ("kor", "Korean"), + ("kur", "Kurdish"), + ("lao", "Lao"), + ("lat", "Latin"), + ("lav", "Latvian"), + ("lit", "Lithuanian"), + ("mal", "Malayalam"), + ("mar", "Marathi"), + ("mkd", "Macedonian"), + ("mlt", "Maltese"), + ("msa", "Malay"), + ("mya", "Burmese"), + ("nep", "Nepali"), + ("nld", "Dutch; Flemish"), + ("nor", "Norwegian"), + ("ori", "Oriya"), + ("pan", "Panjabi; Punjabi"), + ("pol", "Polish"), + ("por", "Portuguese"), + ("pus", "Pushto; Pashto"), + ("ron", "Romanian; Moldavian; Moldovan"), + ("rus", "Russian"), + ("san", "Sanskrit"), + ("sin", "Sinhala; Sinhalese"), + ("slk", "Slovak"), + ("slv", "Slovenian"), + ("spa", "Spanish; Castilian"), + ("spa_old", "Spanish; Castilian - Old"), + ("sqi", "Albanian"), + ("srp", "Serbian"), + ("srp_latn", "Serbian - Latin"), + ("swa", "Swahili"), + ("swe", "Swedish"), + ("syr", "Syriac"), + ("tam", "Tamil"), + ("tel", "Telugu"), + ("tgk", "Tajik"), + ("tgl", "Tagalog"), + ("tha", "Thai"), + ("tir", "Tigrinya"), + ("tur", "Turkish"), + ("uig", "Uighur; Uyghur"), + ("ukr", "Ukrainian"), + ("urd", "Urdu"), + ("uzb", "Uzbek"), + ("uzb_cyrl", "Uzbek - Cyrillic"), + ("vie", "Vietnamese"), + ("yid", "Yiddish"), + ], + default="eng", + help_text="The default language for documents uploaded by this user", + max_length=8, + verbose_name="document language", + ), + ), + migrations.AddField( + model_name="user", + name="language", + field=models.CharField( + blank=True, + choices=[ + ("afr", "Afrikaans"), + ("amh", "Amharic"), + ("ara", "Arabic"), + ("asm", "Assamese"), + ("aze", "Azerbaijani"), + ("aze_cyrl", "Azerbaijani - Cyrillic"), + ("bel", "Belarusian"), + ("ben", "Bengali"), + ("bod", "Tibetan"), + ("bos", "Bosnian"), + ("bul", "Bulgarian"), + ("cat", "Catalan; Valencian"), + ("ceb", "Cebuano"), + ("ces", "Czech"), + ("zho", "Chinese - Simplified"), + ("tra", "Chinese - Traditional"), + ("chr", "Cherokee"), + ("cym", "Welsh"), + ("dan", "Danish"), + ("deu", "German"), + ("dzo", "Dzongkha"), + ("ell", "Greek"), + ("eng", "English"), + ("enm", "Middle English"), + ("epo", "Esperanto"), + ("est", "Estonian"), + ("eus", "Basque"), + ("fas", "Persian"), + ("fin", "Finnish"), + ("fra", "French"), + ("frk", "German Fraktur"), + ("frm", "Middle French"), + ("gle", "Irish"), + ("glg", "Galician"), + ("grc", "Ancient Greek"), + ("guj", "Gujarati"), + ("hat", "Haitian; Haitian Creole"), + ("heb", "Hebrew"), + ("hin", "Hindi"), + ("hrv", "Croatian"), + ("hun", "Hungarian"), + ("iku", "Inuktitut"), + ("ind", "Indonesian"), + ("isl", "Icelandic"), + ("ita", "Italian"), + ("ita_old", "Italian - Old"), + ("jav", "Javanese"), + ("jpn", "Japanese"), + ("kan", "Kannada"), + ("kat", "Georgian"), + ("kat_old", "Georgian - Old"), + ("kaz", "Kazakh"), + ("khm", "Central Khmer"), + ("kir", "Kirghiz; Kyrgyz"), + ("kor", "Korean"), + ("kur", "Kurdish"), + ("lao", "Lao"), + ("lat", "Latin"), + ("lav", "Latvian"), + ("lit", "Lithuanian"), + ("mal", "Malayalam"), + ("mar", "Marathi"), + ("mkd", "Macedonian"), + ("mlt", "Maltese"), + ("msa", "Malay"), + ("mya", "Burmese"), + ("nep", "Nepali"), + ("nld", "Dutch; Flemish"), + ("nor", "Norwegian"), + ("ori", "Oriya"), + ("pan", "Panjabi; Punjabi"), + ("pol", "Polish"), + ("por", "Portuguese"), + ("pus", "Pushto; Pashto"), + ("ron", "Romanian; Moldavian; Moldovan"), + ("rus", "Russian"), + ("san", "Sanskrit"), + ("sin", "Sinhala; Sinhalese"), + ("slk", "Slovak"), + ("slv", "Slovenian"), + ("spa", "Spanish; Castilian"), + ("spa_old", "Spanish; Castilian - Old"), + ("sqi", "Albanian"), + ("srp", "Serbian"), + ("srp_latn", "Serbian - Latin"), + ("swa", "Swahili"), + ("swe", "Swedish"), + ("syr", "Syriac"), + ("tam", "Tamil"), + ("tel", "Telugu"), + ("tgk", "Tajik"), + ("tgl", "Tagalog"), + ("tha", "Thai"), + ("tir", "Tigrinya"), + ("tur", "Turkish"), + ("uig", "Uighur; Uyghur"), + ("ukr", "Ukrainian"), + ("urd", "Urdu"), + ("uzb", "Uzbek"), + ("uzb_cyrl", "Uzbek - Cyrillic"), + ("vie", "Vietnamese"), + ("yid", "Yiddish"), + ], + default="eng", + help_text="The interface language for this user", + max_length=8, + verbose_name="language", + ), + ), + migrations.AddField( + model_name="user", + name="mailkey", + field=models.UUIDField( + help_text="Mail key for uploading documents via email", + null=True, + verbose_name="mailkey", + ), + ), + migrations.AddField( + model_name="user", + name="bio", + field=models.TextField( + blank=True, + help_text="Public bio for the user, in Markdown", + verbose_name="bio", + ), + ), + ] \ No newline at end of file diff --git a/documentcloud/users/migrations/0008_user_active_addons.py b/documentcloud/users/migrations/0008_user_active_addons.py deleted file mode 100644 index bcdc6bb5..00000000 --- a/documentcloud/users/migrations/0008_user_active_addons.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 3.2.9 on 2022-03-30 19:14 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('addons', '0005_auto_20220330_1908'), - ('users', '0007_auto_20211102_1707'), - ] - - operations = [ - migrations.AddField( - model_name='user', - name='active_addons', - field=models.ManyToManyField(help_text='Add-Ons shown for this user', related_name='users', to='addons.AddOn', verbose_name='active add-ons'), - ), - ] diff --git a/documentcloud/users/migrations/0009_user_mailkey.py b/documentcloud/users/migrations/0009_user_mailkey.py index bbdacf07..4ff5c95b 100644 --- a/documentcloud/users/migrations/0009_user_mailkey.py +++ b/documentcloud/users/migrations/0009_user_mailkey.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('users', '0008_user_active_addons'), + ('users', '0007_auto_20211102_1707'), ] operations = [ diff --git a/documentcloud/users/migrations/0013_user_active_addons.py b/documentcloud/users/migrations/0013_user_active_addons.py new file mode 100644 index 00000000..6d2ca256 --- /dev/null +++ b/documentcloud/users/migrations/0013_user_active_addons.py @@ -0,0 +1,21 @@ +from django.db import migrations, models + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username"), + ("users", "0007_auto_20211102_1707"), + ] + + operations = [ + migrations.AddField( + model_name="user", + name="active_addons", + field=models.ManyToManyField( + help_text="Add-Ons shown for this user", + related_name="users", + to="addons.addon", + verbose_name="active add-ons", + ), + ), + ] \ No newline at end of file diff --git a/documentcloud/users/migrations/0014_merge_20260414_1910.py b/documentcloud/users/migrations/0014_merge_20260414_1910.py new file mode 100644 index 00000000..7fb57b4d --- /dev/null +++ b/documentcloud/users/migrations/0014_merge_20260414_1910.py @@ -0,0 +1,13 @@ +# Generated by Django 5.2.13 on 2026-04-14 19:10 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0012_default_pinned_projects"), + ("users", "0013_user_active_addons"), + ] + + operations = [] From 94e41650c769c5fd0a844d3059c749fecc46168e Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 17:45:03 -0500 Subject: [PATCH 06/40] Format --- config/settings/base.py | 2 +- config/urls.py | 10 ++++++---- documentcloud/addons/choices.py | 2 +- .../common/serverless/tests/test_error_handling.py | 2 +- documentcloud/conftest.py | 1 - documentcloud/core/choices.py | 2 +- documentcloud/documents/choices.py | 3 ++- documentcloud/documents/fields.py | 9 +++------ documentcloud/documents/modifications.py | 1 + .../documents/processing/info_and_image/main.py | 7 +++---- .../documents/processing/info_and_image/pdfium.py | 2 +- documentcloud/documents/processing/ocr/main.py | 5 ++--- .../documents/processing/tests/pipeline_tests/mocks.py | 4 +--- .../documents/processing/tests/report_generator.py | 8 ++------ documentcloud/documents/search.py | 2 +- documentcloud/documents/serializers.py | 4 +++- documentcloud/documents/solr.py | 4 ++-- documentcloud/documents/tasks.py | 2 +- documentcloud/documents/tests/test_search.py | 2 +- documentcloud/documents/views.py | 2 +- documentcloud/entities/choices.py | 2 +- documentcloud/entities/tests/factories.py | 1 + documentcloud/projects/choices.py | 2 +- tasks.py | 2 +- 24 files changed, 38 insertions(+), 43 deletions(-) diff --git a/config/settings/base.py b/config/settings/base.py index 4b65621f..f7eb4a0b 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -496,7 +496,7 @@ PROGRESS_URL = env("PROGRESS_URL", default="") IMPORT_URL = env("IMPORT_URL", default="") PROGRESS_TIMEOUT = env.int("PROGRESS_TIMEOUT", default=1) -#SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") +# SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") # Auth LOGIN_URL = "/accounts/login/squarelet" diff --git a/config/urls.py b/config/urls.py index 78d935ca..18b3df31 100644 --- a/config/urls.py +++ b/config/urls.py @@ -49,7 +49,6 @@ ProjectMembershipViewSet, ProjectViewSet, ) - from documentcloud.statistics.views import StatisticsViewSet from documentcloud.users.views import MessageView, UserViewSet @@ -75,8 +74,12 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): documents_router.register("notes", NoteViewSet) documents_router.register("sections", SectionViewSet) documents_router.register("entities", EntityOccurrenceViewSet) -documents_router.register("legacy_entities_2", LegacyEntity2ViewSet, basename="legacyentity2") -documents_router.register("legacy_entities", LegacyEntityViewSet, basename="legacyentity") +documents_router.register( + "legacy_entities_2", LegacyEntity2ViewSet, basename="legacyentity2" +) +documents_router.register( + "legacy_entities", LegacyEntityViewSet, basename="legacyentity" +) documents_router.register("dates", EntityDateViewSet) documents_router.register("errors", DocumentErrorViewSet) documents_router.register("data", DataViewSet, basename="data") @@ -90,7 +93,6 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): projects_router.register("users", CollaborationViewSet) - router.register("documents/search/saved", SavedSearchViewSet, basename="saved_search") urlpatterns = [ diff --git a/documentcloud/addons/choices.py b/documentcloud/addons/choices.py index 9130f337..f2a8e1d9 100644 --- a/documentcloud/addons/choices.py +++ b/documentcloud/addons/choices.py @@ -16,4 +16,4 @@ def __new__(cls, value, label=None, api=False): hourly = 1, _("Hourly"), True daily = 2, _("Daily"), True weekly = 3, _("Weekly"), True - upload = 4, _("Upload"), True \ No newline at end of file + upload = 4, _("Upload"), True diff --git a/documentcloud/common/serverless/tests/test_error_handling.py b/documentcloud/common/serverless/tests/test_error_handling.py index 23c0a8eb..db26b324 100644 --- a/documentcloud/common/serverless/tests/test_error_handling.py +++ b/documentcloud/common/serverless/tests/test_error_handling.py @@ -11,7 +11,6 @@ # Third Party import pytest -from sharedmock.mock import SharedMock # DocumentCloud from documentcloud.common import redis_fields @@ -27,6 +26,7 @@ StorageHandler, Workspace, ) +from sharedmock.mock import SharedMock # Since redis is used in the SharedMock calls, it needs to be pickle-able # in order to be sent across the process boundary. FakeRedis and Mock's both diff --git a/documentcloud/conftest.py b/documentcloud/conftest.py index e3b31fa6..9a81b0a0 100644 --- a/documentcloud/conftest.py +++ b/documentcloud/conftest.py @@ -32,7 +32,6 @@ def pytest_ignore_collect(collection_path, config): return collection_path.is_dir() and collection_path.is_symlink() - @pytest.fixture def user(): return UserFactory() diff --git a/documentcloud/core/choices.py b/documentcloud/core/choices.py index 680b410a..337e16bc 100644 --- a/documentcloud/core/choices.py +++ b/documentcloud/core/choices.py @@ -113,4 +113,4 @@ def __new__(cls, value, label=None, ocr_code=None): uzbek = "uzb", _("Uzbek") uzbek_cyrillic = "uzb_cyrl", _("Uzbek - Cyrillic") vietnamese = "vie", _("Vietnamese") - yiddish = "yid", _("Yiddish") \ No newline at end of file + yiddish = "yid", _("Yiddish") diff --git a/documentcloud/documents/choices.py b/documentcloud/documents/choices.py index a2931f00..fd0dec5e 100644 --- a/documentcloud/documents/choices.py +++ b/documentcloud/documents/choices.py @@ -2,6 +2,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ + class Access(models.IntegerChoices): def __new__(cls, value, label=None, api=False): obj = int.__new__(cls, value) @@ -79,4 +80,4 @@ def __new__(cls, value, label=None, api=False): unknown = 0, _("Unknown"), True proper = 1, _("Proper"), True - common = 2, _("Common"), True \ No newline at end of file + common = 2, _("Common"), True diff --git a/documentcloud/documents/fields.py b/documentcloud/documents/fields.py index 125b0426..716a4e08 100644 --- a/documentcloud/documents/fields.py +++ b/documentcloud/documents/fields.py @@ -4,12 +4,9 @@ class ChoiceField(serializers.ChoiceField): """Choice field enhanced to use the choices label and ability to omit choices""" + def __init__(self, choices, **kwargs): - choices = [ - (member.value, member.name) - for member in choices - if member.api - ] + choices = [(member.value, member.name) for member in choices if member.api] self.choice_map = {label: value for value, label in choices} super().__init__(choices, **kwargs) @@ -25,4 +22,4 @@ def to_internal_value(self, data): try: return self.choice_map[str(data)] except KeyError: - self.fail("invalid_choice", input=data) \ No newline at end of file + self.fail("invalid_choice", input=data) diff --git a/documentcloud/documents/modifications.py b/documentcloud/documents/modifications.py index 6fa89acb..1682dc58 100644 --- a/documentcloud/documents/modifications.py +++ b/documentcloud/documents/modifications.py @@ -2,6 +2,7 @@ # Django from django.db import transaction +# Standard Library from collections import defaultdict from copy import copy diff --git a/documentcloud/documents/processing/info_and_image/main.py b/documentcloud/documents/processing/info_and_image/main.py index 090e44bc..10cd414a 100755 --- a/documentcloud/documents/processing/info_and_image/main.py +++ b/documentcloud/documents/processing/info_and_image/main.py @@ -36,7 +36,6 @@ # Imports based on execution context if env.str("ENVIRONMENT").startswith("local"): # DocumentCloud - from documentcloud.documents.processing.info_and_image import graft from documentcloud.common import access_choices, path, redis_fields from documentcloud.common.environment import ( encode_pubsub_data, @@ -45,11 +44,12 @@ storage, ) from documentcloud.common.serverless import utils - from documentcloud.common.serverless.utils import REDIS_TTL from documentcloud.common.serverless.error_handling import ( pubsub_function, pubsub_function_import, ) + from documentcloud.common.serverless.utils import REDIS_TTL + from documentcloud.documents.processing.info_and_image import graft from documentcloud.documents.processing.info_and_image.graft_adapter import ( GraftContext, ) @@ -60,7 +60,6 @@ else: # Third Party import graft - # only initialize sentry on serverless import sentry_sdk from common import access_choices, path, redis_fields @@ -71,8 +70,8 @@ storage, ) from common.serverless import utils - from common.serverless.utils import REDIS_TTL from common.serverless.error_handling import pubsub_function, pubsub_function_import + from common.serverless.utils import REDIS_TTL from graft_adapter import GraftContext from pdfium import StorageHandler, Workspace from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration diff --git a/documentcloud/documents/processing/info_and_image/pdfium.py b/documentcloud/documents/processing/info_and_image/pdfium.py index 7a5df3fe..03b9c05e 100755 --- a/documentcloud/documents/processing/info_and_image/pdfium.py +++ b/documentcloud/documents/processing/info_and_image/pdfium.py @@ -374,7 +374,7 @@ def get_media_box(self): def set_desired_transform(self, page_object, x, y, width, height): # Get the bounds of the text object - (left, bottom, right, top) = self.get_bounds(page_object) + left, bottom, right, top = self.get_bounds(page_object) # Transform to origin self.workspace.fpdf_page_obj_transform( diff --git a/documentcloud/documents/processing/ocr/main.py b/documentcloud/documents/processing/ocr/main.py index 2d17faf9..aac04c38 100755 --- a/documentcloud/documents/processing/ocr/main.py +++ b/documentcloud/documents/processing/ocr/main.py @@ -30,9 +30,9 @@ publisher, storage, ) - from documentcloud.common.utils import graft_page from documentcloud.common.serverless import utils from documentcloud.common.serverless.error_handling import pubsub_function + from documentcloud.common.utils import graft_page from documentcloud.documents.processing.ocr.tess import Tesseract else: # Third Party @@ -45,12 +45,11 @@ publisher, storage, ) - from common.utils import graft_page from common.serverless import utils from common.serverless.error_handling import pubsub_function + from common.utils import graft_page from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration from sentry_sdk.integrations.redis import RedisIntegration - from tess import Tesseract sentry_sdk.init( diff --git a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py index 05089cb9..d9d0d3a8 100644 --- a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py +++ b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py @@ -8,10 +8,8 @@ from contextlib import ExitStack from unittest.mock import patch -# Third Party -from config import celery_app - # DocumentCloud +from config import celery_app from documentcloud.common import path from documentcloud.common.serverless.utils import get_redis, initialize from documentcloud.documents.processing.tests.pipeline_tests.fake_pdf import FakePage diff --git a/documentcloud/documents/processing/tests/report_generator.py b/documentcloud/documents/processing/tests/report_generator.py index b60f25f9..4d3f4de5 100755 --- a/documentcloud/documents/processing/tests/report_generator.py +++ b/documentcloud/documents/processing/tests/report_generator.py @@ -35,9 +35,7 @@ def __init__(self, filename): self.anchor_id: int = 0 # Write the header and style sheet. Declare utf8 encoding. - self.html_file.write( - inspect.cleandoc( - """ + self.html_file.write(inspect.cleandoc(""" @@ -50,9 +48,7 @@ def __init__(self, filename): } - """ - ) - ) + """)) def add_heading(self, text): """Add a primary header to the HTML document with a table of contents entry. diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py index facc8e98..3c84e469 100644 --- a/documentcloud/documents/search.py +++ b/documentcloud/documents/search.py @@ -9,12 +9,12 @@ from datetime import datetime # Third Party -import pysolr from luqum.parser import ParseError, parser from luqum.tree import BaseOperation, Boost, Group, Not, Prohibit, Range, Unary, Word from luqum.utils import LuceneTreeTransformer, LuceneTreeVisitor # DocumentCloud +import pysolr from documentcloud.core.pagination import CursorPagination, PageNumberPagination from documentcloud.documents.constants import DATA_KEY_REGEX from documentcloud.documents.models import Document diff --git a/documentcloud/documents/serializers.py b/documentcloud/documents/serializers.py index 4167a214..1b977647 100644 --- a/documentcloud/documents/serializers.py +++ b/documentcloud/documents/serializers.py @@ -850,7 +850,9 @@ class EntityOccurrenceSerializer(serializers.ModelSerializer): def get_occurrences(self, obj): def fix(entity): value = entity.pop("kind", 0) - entity["kind"] = OccurrenceKind(value).name if value in OccurrenceKind.values else value + entity["kind"] = ( + OccurrenceKind(value).name if value in OccurrenceKind.values else value + ) return entity return [fix(e) for e in obj.occurrences] diff --git a/documentcloud/documents/solr.py b/documentcloud/documents/solr.py index 2adb8936..a4faf172 100644 --- a/documentcloud/documents/solr.py +++ b/documentcloud/documents/solr.py @@ -31,12 +31,12 @@ import time # Third Party -import pysolr import requests -from config import celery_app from dateutil.parser import parse # DocumentCloud +import pysolr +from config import celery_app from documentcloud.common import path from documentcloud.common.environment import storage from documentcloud.core.utils import grouper diff --git a/documentcloud/documents/tasks.py b/documentcloud/documents/tasks.py index dbf74e04..e7df6c29 100644 --- a/documentcloud/documents/tasks.py +++ b/documentcloud/documents/tasks.py @@ -12,11 +12,11 @@ from datetime import date # Third Party -import pysolr import redis from requests.exceptions import HTTPError, RequestException # DocumentCloud +import pysolr from documentcloud.common.environment import httpsub, storage from documentcloud.core.choices import Language from documentcloud.documents import entity_extraction, modifications, solr diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 7c36435e..8491d6f7 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -8,13 +8,13 @@ from unittest.mock import Mock # Third Party -import pysolr import pytest import pytz from furl import furl from luqum.parser import parser # DocumentCloud +import pysolr from documentcloud.documents.choices import Access, Status from documentcloud.documents.models import Document from documentcloud.documents.search import ( diff --git a/documentcloud/documents/views.py b/documentcloud/documents/views.py index dd8c9254..b3df1d15 100644 --- a/documentcloud/documents/views.py +++ b/documentcloud/documents/views.py @@ -18,7 +18,6 @@ # Third Party import environ -import pysolr from django_filters import rest_framework as django_filters from drf_spectacular.openapi import OpenApiParameter from drf_spectacular.utils import OpenApiExample, OpenApiResponse, extend_schema @@ -27,6 +26,7 @@ from rest_flex_fields.utils import split_levels # DocumentCloud +import pysolr from documentcloud.addons.choices import Event from documentcloud.addons.models import AddOnEvent from documentcloud.common.environment import httpsub diff --git a/documentcloud/entities/choices.py b/documentcloud/entities/choices.py index e878bcc7..7e3e88da 100644 --- a/documentcloud/entities/choices.py +++ b/documentcloud/entities/choices.py @@ -15,4 +15,4 @@ def __new__(cls, value, label=None, api=False): # Free and public to all. public = 0, _("Public"), True # Visible to both the owner and her organization. - private = 2, _("Private"), True \ No newline at end of file + private = 2, _("Private"), True diff --git a/documentcloud/entities/tests/factories.py b/documentcloud/entities/tests/factories.py index b3c1c2de..febdbde2 100644 --- a/documentcloud/entities/tests/factories.py +++ b/documentcloud/entities/tests/factories.py @@ -1,5 +1,6 @@ # Standard Library +# Standard Library import datetime # Third Party diff --git a/documentcloud/projects/choices.py b/documentcloud/projects/choices.py index 9cedb8a2..742e2a3f 100644 --- a/documentcloud/projects/choices.py +++ b/documentcloud/projects/choices.py @@ -17,4 +17,4 @@ def __new__(cls, value, label=None, api=False): # This collaborator can edit the documents in the project edit = 1, _("Edit"), True # This collaborator can edit the documents and the project itself - admin = 2, _("Admin"), True \ No newline at end of file + admin = 2, _("Admin"), True diff --git a/tasks.py b/tasks.py index 32b8b283..e989c12e 100755 --- a/tasks.py +++ b/tasks.py @@ -31,7 +31,7 @@ def test( slow_switch = "" if slow else '-m "not slow"' warnings = "-e PYTHONWARNINGS=always" if warnings else "" filters = f"-k {keywords}" if keywords else "" - + c.run(f"{COMPOSE_PREFIX} up -d documentcloud_test_solr") c.run( COMPOSE_RUN_OPT_USER.format( opt=f"-e DJANGO_SETTINGS_MODULE=config.settings.test {warnings}", From d090b35381337cc80ec3fbc40d528166005c5fd7 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:02:19 -0500 Subject: [PATCH 07/40] Pylint silence --- documentcloud/addons/choices.py | 2 +- documentcloud/common/environment/local/pubsub.py | 15 --------------- .../serverless/tests/test_error_handling.py | 2 +- documentcloud/core/choices.py | 2 +- documentcloud/documents/choices.py | 8 ++++---- documentcloud/entities/choices.py | 2 +- documentcloud/projects/choices.py | 1 + 7 files changed, 9 insertions(+), 23 deletions(-) diff --git a/documentcloud/addons/choices.py b/documentcloud/addons/choices.py index f2a8e1d9..5021e369 100644 --- a/documentcloud/addons/choices.py +++ b/documentcloud/addons/choices.py @@ -11,7 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint:disable = invalid-name disabled = 0, _("Disabled"), True hourly = 1, _("Hourly"), True daily = 2, _("Daily"), True diff --git a/documentcloud/common/environment/local/pubsub.py b/documentcloud/common/environment/local/pubsub.py index 5970808d..1af7e3e3 100644 --- a/documentcloud/common/environment/local/pubsub.py +++ b/documentcloud/common/environment/local/pubsub.py @@ -128,14 +128,6 @@ def finish_import_task(data): return finish_import_process.delay(data) - -def sidekick_preprocess_task(data): - # DocumentCloud - from documentcloud.sidekick.tasks import sidekick_preprocess - - return sidekick_preprocess.delay(data) - - def retry_errors_task(data): # DocumentCloud from documentcloud.documents.tasks import retry_errors_local @@ -191,13 +183,6 @@ def retry_errors_task(data): ("documentcloud", env.str("FINISH_IMPORT_TOPIC", default="finish-import")), finish_import_task, ) -publisher.register_internal_callback( - ( - "documentcloud", - env.str("SIDEKICK_PREPROCESS_TOPIC", default="sidekick-preprocess-topic"), - ), - sidekick_preprocess_task, -) publisher.register_internal_callback( ("documentcloud", env.str("RETRY_ERROR_TOPIC", default="retry-error-topic")), retry_errors_task, diff --git a/documentcloud/common/serverless/tests/test_error_handling.py b/documentcloud/common/serverless/tests/test_error_handling.py index db26b324..23c0a8eb 100644 --- a/documentcloud/common/serverless/tests/test_error_handling.py +++ b/documentcloud/common/serverless/tests/test_error_handling.py @@ -11,6 +11,7 @@ # Third Party import pytest +from sharedmock.mock import SharedMock # DocumentCloud from documentcloud.common import redis_fields @@ -26,7 +27,6 @@ StorageHandler, Workspace, ) -from sharedmock.mock import SharedMock # Since redis is used in the SharedMock calls, it needs to be pickle-able # in order to be sent across the process boundary. FakeRedis and Mock's both diff --git a/documentcloud/core/choices.py b/documentcloud/core/choices.py index 337e16bc..94ef1ea2 100644 --- a/documentcloud/core/choices.py +++ b/documentcloud/core/choices.py @@ -11,7 +11,7 @@ def __new__(cls, value, label=None, ocr_code=None): obj._label_ = label obj.ocr_code = ocr_code if ocr_code is not None else value return obj - + # pylint: disable = invalid-name afrikaans = "afr", _("Afrikaans") amharic = "amh", _("Amharic") arabic = "ara", _("Arabic") diff --git a/documentcloud/documents/choices.py b/documentcloud/documents/choices.py index fd0dec5e..38a9fdad 100644 --- a/documentcloud/documents/choices.py +++ b/documentcloud/documents/choices.py @@ -11,7 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint: disable = invalid-name # Free and public to all. public = 0, _("Public"), True # Visible to both the owner and her organization. @@ -30,7 +30,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint: disable = invalid-name # The document is in a normal successful state success = 0, _("Success"), True # The document is processing, but readable during the operation @@ -53,7 +53,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint: disable = invalid-name unknown = 0, _("Unknown"), True person = 1, _("Person"), True location = 2, _("Location"), True @@ -77,7 +77,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint: disable = invalid-name unknown = 0, _("Unknown"), True proper = 1, _("Proper"), True common = 2, _("Common"), True diff --git a/documentcloud/entities/choices.py b/documentcloud/entities/choices.py index 7e3e88da..f3109c9f 100644 --- a/documentcloud/entities/choices.py +++ b/documentcloud/entities/choices.py @@ -11,7 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj - + # pylint:disable=invalid-name # Free and public to all. public = 0, _("Public"), True # Visible to both the owner and her organization. diff --git a/documentcloud/projects/choices.py b/documentcloud/projects/choices.py index 742e2a3f..d6b4f423 100644 --- a/documentcloud/projects/choices.py +++ b/documentcloud/projects/choices.py @@ -12,6 +12,7 @@ def __new__(cls, value, label=None, api=False): obj.api = api return obj + # pylint:disable = invalid-name # This collaborator has read access view = 0, _("View"), True # This collaborator can edit the documents in the project From 53323de945263d356d4f5bc063a709067ae773e6 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:16:48 -0500 Subject: [PATCH 08/40] Bump python in workflow --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cf291c04..6e06259e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ on: - staging env: - PYENV_VERSION: '3.10' + PYENV_VERSION: '3.12' DJANGO_SECRET_KEY: ${{secrets.SECRET_KEY}} DJANGO_SETTINGS_MODULE: config.settings.test DATABASE_URL: postgres://${PGUSER}:${PGPASSWORD}@127.0.0.1:5432/test @@ -63,7 +63,7 @@ jobs: submodules: recursive - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" cache: "pip" # caching pip dependencies - name: Install pip @@ -122,7 +122,7 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" cache: "pip" # caching pip dependencies - name: Install pip From 9e0aeec43265260c403105f2958fb2993eb1b5a3 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:01:19 -0500 Subject: [PATCH 09/40] Pylint complains about sorting isort won't fix, so let's ignore it --- .pylintrc | 7 +++-- documentcloud/addons/choices.py | 1 + .../common/environment/local/pubsub.py | 1 + .../serverless/tests/test_error_handling.py | 2 +- documentcloud/core/choices.py | 1 + documentcloud/documents/choices.py | 4 +++ .../documents/models/saved_search.py | 1 + .../documents/tests/test_saved_searches.py | 1 + documentcloud/entities/choices.py | 1 + requirements/local.in | 2 +- requirements/local.txt | 27 +++++++------------ 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.pylintrc b/.pylintrc index 151fce40..69a4dd17 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,6 @@ [MASTER] -# https://stackoverflow.com/a/39207275/10952222 -init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.join(os.path.dirname(find_pylintrc()), 'documentcloud'))" -load-plugins=pylint_django, pylint_celery +init-hook="import sys, os; sys.path.insert(0, '/app'); os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.local')" +load-plugins=pylint_django ignore=migrations [FORMAT] @@ -11,7 +10,7 @@ max-args=6 [MESSAGES CONTROL] enable=useless-suppression -disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured +disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured,wrong-import-order,wrong-import-position [TYPECHECK] generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete diff --git a/documentcloud/addons/choices.py b/documentcloud/addons/choices.py index 5021e369..c4d2094a 100644 --- a/documentcloud/addons/choices.py +++ b/documentcloud/addons/choices.py @@ -11,6 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint:disable = invalid-name disabled = 0, _("Disabled"), True hourly = 1, _("Hourly"), True diff --git a/documentcloud/common/environment/local/pubsub.py b/documentcloud/common/environment/local/pubsub.py index 1af7e3e3..170ddabf 100644 --- a/documentcloud/common/environment/local/pubsub.py +++ b/documentcloud/common/environment/local/pubsub.py @@ -128,6 +128,7 @@ def finish_import_task(data): return finish_import_process.delay(data) + def retry_errors_task(data): # DocumentCloud from documentcloud.documents.tasks import retry_errors_local diff --git a/documentcloud/common/serverless/tests/test_error_handling.py b/documentcloud/common/serverless/tests/test_error_handling.py index 23c0a8eb..db26b324 100644 --- a/documentcloud/common/serverless/tests/test_error_handling.py +++ b/documentcloud/common/serverless/tests/test_error_handling.py @@ -11,7 +11,6 @@ # Third Party import pytest -from sharedmock.mock import SharedMock # DocumentCloud from documentcloud.common import redis_fields @@ -27,6 +26,7 @@ StorageHandler, Workspace, ) +from sharedmock.mock import SharedMock # Since redis is used in the SharedMock calls, it needs to be pickle-able # in order to be sent across the process boundary. FakeRedis and Mock's both diff --git a/documentcloud/core/choices.py b/documentcloud/core/choices.py index 94ef1ea2..21fd5c18 100644 --- a/documentcloud/core/choices.py +++ b/documentcloud/core/choices.py @@ -11,6 +11,7 @@ def __new__(cls, value, label=None, ocr_code=None): obj._label_ = label obj.ocr_code = ocr_code if ocr_code is not None else value return obj + # pylint: disable = invalid-name afrikaans = "afr", _("Afrikaans") amharic = "amh", _("Amharic") diff --git a/documentcloud/documents/choices.py b/documentcloud/documents/choices.py index 38a9fdad..78a3e428 100644 --- a/documentcloud/documents/choices.py +++ b/documentcloud/documents/choices.py @@ -11,6 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint: disable = invalid-name # Free and public to all. public = 0, _("Public"), True @@ -30,6 +31,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint: disable = invalid-name # The document is in a normal successful state success = 0, _("Success"), True @@ -53,6 +55,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint: disable = invalid-name unknown = 0, _("Unknown"), True person = 1, _("Person"), True @@ -77,6 +80,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint: disable = invalid-name unknown = 0, _("Unknown"), True proper = 1, _("Proper"), True diff --git a/documentcloud/documents/models/saved_search.py b/documentcloud/documents/models/saved_search.py index b5850230..f071a2e2 100644 --- a/documentcloud/documents/models/saved_search.py +++ b/documentcloud/documents/models/saved_search.py @@ -3,6 +3,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ +# Standard Library from uuid import uuid4 # DocumentCloud diff --git a/documentcloud/documents/tests/test_saved_searches.py b/documentcloud/documents/tests/test_saved_searches.py index 5e7100c3..74889c1a 100644 --- a/documentcloud/documents/tests/test_saved_searches.py +++ b/documentcloud/documents/tests/test_saved_searches.py @@ -2,6 +2,7 @@ # Django from rest_framework import status +# Third Party import pytest # DocumentCloud diff --git a/documentcloud/entities/choices.py b/documentcloud/entities/choices.py index f3109c9f..25d51808 100644 --- a/documentcloud/entities/choices.py +++ b/documentcloud/entities/choices.py @@ -11,6 +11,7 @@ def __new__(cls, value, label=None, api=False): obj._label_ = label obj.api = api return obj + # pylint:disable=invalid-name # Free and public to all. public = 0, _("Public"), True diff --git a/requirements/local.in b/requirements/local.in index 28f47dbb..784fd276 100644 --- a/requirements/local.in +++ b/requirements/local.in @@ -21,7 +21,7 @@ flake8 coverage black pylint-django -pylint-celery +pylint>=3,<4 # Django # ------------------------------------------------------------------------------ diff --git a/requirements/local.txt b/requirements/local.txt index fb84ef44..0e77b19f 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -51,10 +51,8 @@ asgiref==3.11.1 # via # -r requirements/base.txt # django -astroid==2.11.7 - # via - # pylint - # pylint-celery +astroid==3.3.11 + # via pylint asttokens==2.0.5 # via # -r requirements/base.txt @@ -177,7 +175,7 @@ deprecated==1.3.1 # via # -r requirements/base.txt # pikepdf -dill==0.3.5.1 +dill==0.4.1 # via pylint django==5.2.13 # via @@ -393,7 +391,7 @@ ipython==8.10.0 # via # -r requirements/base.txt # ipdb -isort==4.3.21 +isort==6.1.0 # via pylint jedi==0.18.1 # via @@ -415,8 +413,6 @@ kombu==5.4.2 # via # -r requirements/base.txt # celery -lazy-object-proxy==1.4.2 - # via astroid levenshtein==0.27.3 # via python-levenshtein license-expression==30.4.4 @@ -622,19 +618,15 @@ pyjwt==2.12.1 # -r requirements/base.txt # djangorestframework-simplejwt # social-auth-core -pylint==2.14.5 +pylint==3.3.9 # via - # pylint-celery + # -r requirements/local.in # pylint-django # pylint-plugin-utils -pylint-celery==0.3 +pylint-django==2.7.0 # via -r requirements/local.in -pylint-django==2.5.3 - # via -r requirements/local.in -pylint-plugin-utils==0.7 - # via - # pylint-celery - # pylint-django +pylint-plugin-utils==0.9.0 + # via pylint-django pymupdf==1.25.3 # via -r requirements/base.txt pyparsing==3.3.2 @@ -869,7 +861,6 @@ wrapt==1.17.3 # via # -r requirements/base.txt # aiobotocore - # astroid # deprecated # smart-open yarl==1.23.0 From 6dadb2c20c25c73744631ca8a90e8a875dfa4229 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:44:48 -0500 Subject: [PATCH 10/40] almost there --- documentcloud/addons/models.py | 2 +- documentcloud/addons/tasks.py | 2 +- documentcloud/common/serverless/utils.py | 2 +- documentcloud/core/versioning.py | 2 +- documentcloud/documents/processing/info_and_image/main.py | 3 +-- documentcloud/documents/processing/info_and_image/pdfium.py | 4 ++-- documentcloud/documents/processing/ocr/main.py | 2 +- documentcloud/documents/processing/ocr/tess.py | 2 +- documentcloud/documents/processing/tests/imagediff.py | 2 +- .../documents/processing/tests/pipeline_tests/mocks.py | 2 +- documentcloud/documents/tests/test_modifications.py | 2 +- documentcloud/documents/tests/test_search.py | 2 +- 12 files changed, 13 insertions(+), 14 deletions(-) diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 7bdc70e1..c5111939 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) - +# pylint:disable=too-many-positional-arguments class AddOn(models.Model): objects = AddOnQuerySet.as_manager() diff --git a/documentcloud/addons/tasks.py b/documentcloud/addons/tasks.py index bbd203a3..f76383c6 100644 --- a/documentcloud/addons/tasks.py +++ b/documentcloud/addons/tasks.py @@ -61,7 +61,7 @@ def set_run_status(uuid): # if we are not in a completed status, continue polling for new status set_run_status.apply_async(args=[uuid], countdown=5) - +# pylint:disable=too-many-positional-arguments @shared_task def dispatch(addon_id, uuid, user_id, documents, query, parameters, event_id=None): # pylint: disable=too-many-arguments diff --git a/documentcloud/common/serverless/utils.py b/documentcloud/common/serverless/utils.py index 6bed1376..00782dcd 100644 --- a/documentcloud/common/serverless/utils.py +++ b/documentcloud/common/serverless/utils.py @@ -21,7 +21,7 @@ env = environ.Env() -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments if not env.str("ENVIRONMENT").startswith("local"): # in production, log errors to sentry diff --git a/documentcloud/core/versioning.py b/documentcloud/core/versioning.py index 2c459b3e..a743d585 100644 --- a/documentcloud/core/versioning.py +++ b/documentcloud/core/versioning.py @@ -4,7 +4,7 @@ class QueryParameterVersioning(versioning.QueryParameterVersioning): - # pylint: disable=redefined-builtin + # pylint: disable=redefined-builtin, too-many-positional-arguments def reverse( self, viewname, args=None, kwargs=None, request=None, format=None, **extra ): diff --git a/documentcloud/documents/processing/info_and_image/main.py b/documentcloud/documents/processing/info_and_image/main.py index 10cd414a..45714395 100755 --- a/documentcloud/documents/processing/info_and_image/main.py +++ b/documentcloud/documents/processing/info_and_image/main.py @@ -31,7 +31,7 @@ # remove this when done with import code # pylint: disable=too-many-lines -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments # Imports based on execution context if env.str("ENVIRONMENT").startswith("local"): @@ -327,7 +327,6 @@ def redact_document_and_overwrite(doc_id, slug, access, redactions): def get_redis_pagespec(doc_id): """Get the dimensions of all pages in a convenient format using Redis""" - # pylint: disable=too-many-nested-blocks dimensions_field = redis_fields.dimensions(doc_id) pipeline = REDIS.pipeline() diff --git a/documentcloud/documents/processing/info_and_image/pdfium.py b/documentcloud/documents/processing/info_and_image/pdfium.py index 03b9c05e..6f11790e 100755 --- a/documentcloud/documents/processing/info_and_image/pdfium.py +++ b/documentcloud/documents/processing/info_and_image/pdfium.py @@ -33,7 +33,7 @@ INT_MAX = 2147483647 # Adapted from https://github.com/gersonkurz/pydfium - +# pylint: disable=too-many-positional-arguments class FPDFLibraryConfig(Structure): _fields_ = [ @@ -129,7 +129,7 @@ def get_image(self): img = PIL.Image.frombuffer( "RGBA", (self.width, self.height), bitmap.contents, "raw", "RGBA", 0, 1 ) - # pylint: disable=invalid-name, unbalanced-tuple-unpacking + # pylint: disable=unbalanced-tuple-unpacking b, g, r, _a = img.split() img = PIL.Image.merge("RGB", (r, g, b)) return img diff --git a/documentcloud/documents/processing/ocr/main.py b/documentcloud/documents/processing/ocr/main.py index aac04c38..db391b7a 100755 --- a/documentcloud/documents/processing/ocr/main.py +++ b/documentcloud/documents/processing/ocr/main.py @@ -18,7 +18,7 @@ logger = logging.getLogger() logger.setLevel(logging.INFO) -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments # Imports based on execution context if env.str("ENVIRONMENT").startswith("local"): diff --git a/documentcloud/documents/processing/ocr/tess.py b/documentcloud/documents/processing/ocr/tess.py index c47b48f8..3363fe61 100755 --- a/documentcloud/documents/processing/ocr/tess.py +++ b/documentcloud/documents/processing/ocr/tess.py @@ -16,7 +16,7 @@ LIB_PATH = os.path.join(script_dir, "tesseract/libtesseract.so.5") DATA_PATH = TMP_DIRECTORY - +# pylint:disable = too-many-positional-arguments class TesseractError(Exception): pass diff --git a/documentcloud/documents/processing/tests/imagediff.py b/documentcloud/documents/processing/tests/imagediff.py index d065398d..e9dea833 100755 --- a/documentcloud/documents/processing/tests/imagediff.py +++ b/documentcloud/documents/processing/tests/imagediff.py @@ -18,7 +18,7 @@ # Local from .report_generator import ReportGenerator - +# pylint: disable=too-many-positional-arguments def same_images( test_image: str, expected_image: str, diff --git a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py index d9d0d3a8..ef933c6d 100644 --- a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py +++ b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py @@ -46,7 +46,7 @@ def patch_env(env): # Mock methods -# pylint: disable=unused-argument +# pylint: disable=unused-argument, too-many-positional-arguments def page_loaded(page): pass diff --git a/documentcloud/documents/tests/test_modifications.py b/documentcloud/documents/tests/test_modifications.py index 9f284cb3..7bd32125 100644 --- a/documentcloud/documents/tests/test_modifications.py +++ b/documentcloud/documents/tests/test_modifications.py @@ -29,7 +29,7 @@ def send_post_process(document, modifications): class TestPostProcess: """Test the page modification post processing""" - # pylint: disable=too-many-arguments + # pylint: disable=too-many-arguments, too-many-positional-arguments @pytest.mark.parametrize("factory,attr", models) @pytest.mark.parametrize( "page_spec,initial_page,final_page,count,page_count", tests diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 8491d6f7..54b4b63b 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -44,7 +44,7 @@ from documentcloud.users.models import User from documentcloud.users.tests.factories import UserFactory -# pylint: disable=too-many-public-methods +# pylint: disable=too-many-public-methods, too-many-positional-arguments @pytest.fixture(scope="class") From ac6a5bcec01f240d7eeedad826a18f2b2d62a539 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:48:21 -0500 Subject: [PATCH 11/40] Remove get-pip from workflow, outdated --- .github/workflows/test.yml | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6e06259e..108b246a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,4 @@ name: CI - on: workflow_dispatch: pull_request: @@ -7,7 +6,6 @@ on: branches: - master - staging - env: PYENV_VERSION: '3.12' DJANGO_SECRET_KEY: ${{secrets.SECRET_KEY}} @@ -38,15 +36,10 @@ env: DOCUMENT_BUCKET: ${{secrets.DOCUMENT_BUCKET}} REDIS_PROCESSING_PASSWORD: "" SAM_CLI_TELEMETRY: 0 - - - - jobs: check: runs-on: ubuntu-latest - services: redis: image: redis @@ -65,28 +58,17 @@ jobs: with: python-version: "3.12" cache: "pip" # caching pip dependencies - - - name: Install pip - run: | - wget https://bootstrap.pypa.io/pip/3.6/get-pip.py - python3 get-pip.py - - name: Install run: | pip install -r requirements/local.txt - - name: Lint run: pylint documentcloud - - name: Isort run: isort --check-only -rc documentcloud - - name: Formatting run: black --check documentcloud --exclude migrations - test: runs-on: ubuntu-latest - services: redis: image: redis @@ -97,7 +79,6 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 5 - postgres: image: postgres env: @@ -113,33 +94,21 @@ jobs: ports: # Maps tcp port 5432 on service container to the host - 5432:5432 - steps: - - name: Checkout uses: actions/checkout@v4 - - name: Install Python uses: actions/setup-python@v5 with: python-version: "3.12" cache: "pip" # caching pip dependencies - - - name: Install pip - run: | - wget https://bootstrap.pypa.io/pip/3.6/get-pip.py - python3 get-pip.py - - name: Install requirements run: | pip install -r requirements/local.txt - - - name: Test run: pytest documentcloud -m "not solr" env: # use the credentials for the service container PG_USER: test PG_PASSWORD: ${{ secrets.PG_PASSWORD }} - DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test - + DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test \ No newline at end of file From a232f4153cb868665e9766945d468d8711708dc2 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:29:01 -0500 Subject: [PATCH 12/40] Resolve checks --- .github/workflows/test.yml | 4 ++-- documentcloud/addons/models.py | 14 +++++++++++--- documentcloud/addons/tasks.py | 1 + documentcloud/addons/views.py | 13 +++++++------ .../common/environment/aws/processing_token.py | 2 +- documentcloud/common/environment/aws/storage.py | 2 +- documentcloud/common/environment/gcp/httpsub.py | 2 +- documentcloud/documents/models/document.py | 1 + .../documents/processing/info_and_image/main.py | 1 + .../documents/processing/info_and_image/pdfium.py | 1 + documentcloud/documents/processing/ocr/tess.py | 1 + .../documents/processing/tests/imagediff.py | 1 + documentcloud/documents/search.py | 4 ++-- documentcloud/documents/solr.py | 2 +- documentcloud/documents/tasks.py | 2 +- documentcloud/documents/tests/test_search.py | 2 +- documentcloud/documents/views.py | 2 +- 17 files changed, 35 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 108b246a..c4fff926 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -64,7 +64,7 @@ jobs: - name: Lint run: pylint documentcloud - name: Isort - run: isort --check-only -rc documentcloud + run: isort --check-only --diff documentcloud - name: Formatting run: black --check documentcloud --exclude migrations test: @@ -111,4 +111,4 @@ jobs: # use the credentials for the service container PG_USER: test PG_PASSWORD: ${{ secrets.PG_PASSWORD }} - DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test \ No newline at end of file + DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index c5111939..ef7cfc5d 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -32,6 +32,7 @@ logger = logging.getLogger(__name__) + # pylint:disable=too-many-positional-arguments class AddOn(models.Model): objects = AddOnQuerySet.as_manager() @@ -189,6 +190,7 @@ def dispatch(self, uuid, user, documents, query, parameters, event_id): f"{self.api_url}/dispatches", headers=self.api_headers, json={"event_type": self.name, "client_payload": payload}, + timeout=10, ) resp.raise_for_status() @@ -197,6 +199,7 @@ def update_config(self): resp = requests.get( f"{self.api_url}/contents/config.yaml", headers={**self.api_headers, "Accept": "application/vnd.github.v3.raw"}, + timeout=10, ) if resp.status_code == 404: self.error = True @@ -380,7 +383,7 @@ def find_run_id(self): url = f"{self.addon.api_url}/actions/runs?created=%3E{date_filter}" while url is not None: logger.info("[FIND RUN ID] get %s", url) - resp = requests.get(url, headers=self.addon.api_headers) + resp = requests.get(url, headers=self.addon.api_headers, timeout=10) resp.raise_for_status() url = resp.links.get("next", {}).get("url") resp_json = resp.json() @@ -403,7 +406,9 @@ def find_run_id(self): else: continue - resp = requests.get(jobs_url, headers=self.addon.api_headers) + resp = requests.get( + jobs_url, headers=self.addon.api_headers, timeout=10 + ) resp.raise_for_status() jobs = resp.json()["jobs"] @@ -435,6 +440,7 @@ def set_status(self): resp = requests.get( f"{self.addon.api_url}/actions/runs/{self.run_id}", headers=self.addon.api_headers, + timeout=10, ) if resp.status_code != 200: logger.info( @@ -449,7 +455,7 @@ def set_status(self): # if we failed, check the job status to check for 'cancelled' # which means it timed out resp = requests.get( - resp.json()["jobs_url"], headers=self.addon.api_headers + resp.json()["jobs_url"], headers=self.addon.api_headers, timeout=10 ) if resp.status_code == 200 and len(resp.json()["jobs"]) > 0: status = resp.json()["jobs"][0]["conclusion"] @@ -510,6 +516,7 @@ def cancel(self): resp = requests.post( f"{self.addon.api_url}/actions/runs/{self.run_id}/cancel", headers=self.addon.api_headers, + timeout=10, ) if resp.status_code == 202: return "succeed" @@ -718,6 +725,7 @@ def token(self): "https://api.github.com/app/installations/" f"{self.iid}/access_tokens", headers=headers, + timeout=10, ) resp = resp.json() token = resp["token"] diff --git a/documentcloud/addons/tasks.py b/documentcloud/addons/tasks.py index f76383c6..d2d4c5b9 100644 --- a/documentcloud/addons/tasks.py +++ b/documentcloud/addons/tasks.py @@ -61,6 +61,7 @@ def set_run_status(uuid): # if we are not in a completed status, continue polling for new status set_run_status.apply_async(args=[uuid], countdown=5) + # pylint:disable=too-many-positional-arguments @shared_task def dispatch(addon_id, uuid, user_id, documents, query, parameters, event_id=None): diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index 5247e263..ce5ad84c 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -1004,6 +1004,7 @@ def verify_signature(request): uid=data["sender"]["id"], defaults={"name": data["sender"]["login"]} ) if data.get("action") in ["added", "created"]: + repos = [] logger.info("[GITHUB WEBHOOK] %s", data["action"]) installation, _created = GitHubInstallation.objects.get_or_create( iid=data["installation"]["id"], @@ -1022,11 +1023,11 @@ def verify_signature(request): with transaction.atomic(): AddOn.objects.update_or_create( repository=repo["full_name"], - defaults=dict( - github_account=acct, - github_installation=installation, - removed=False, - ), + defaults={ + "github_account": acct, + "github_installation": installation, + "removed": False, + }, ) transaction.on_commit( lambda r=repo: update_config.delay(r["full_name"]) @@ -1165,7 +1166,7 @@ def get(self, request, *args, **kwargs): url += "/" url += kwargs.get("path", "") - response = requests.get(url) + response = requests.get(url, timeout=(10, 30)) return HttpResponse( content=response.content, status=response.status_code, diff --git a/documentcloud/common/environment/aws/processing_token.py b/documentcloud/common/environment/aws/processing_token.py index 739f9d02..69c0068b 100644 --- a/documentcloud/common/environment/aws/processing_token.py +++ b/documentcloud/common/environment/aws/processing_token.py @@ -17,7 +17,7 @@ def authenticate_token(*args, **kwargs): headers = event["headers"] if headers.get(AUTHORIZATION) != f"processing-token {PROCESSING_TOKEN}": - raise Exception("Authentication Failed.") + raise PermissionError("Authentication Failed.") # If all passes, auth succeeded return func(*args, **kwargs) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 812baab8..1a480b02 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -196,7 +196,7 @@ def exists(self, file_name): def fetch_url(self, url, file_name, access, auth=None): with self.open(file_name, "wb", access=access) as out_file, requests.get( - url, stream=True, auth=auth + url, stream=True, auth=auth, timeout=(10, 60) ) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=10 * 1024 * 1024): diff --git a/documentcloud/common/environment/gcp/httpsub.py b/documentcloud/common/environment/gcp/httpsub.py index f37ba4ab..02a7040a 100644 --- a/documentcloud/common/environment/gcp/httpsub.py +++ b/documentcloud/common/environment/gcp/httpsub.py @@ -1,4 +1,4 @@ -# pylint: disable=unused-import +# pylint: disable=unused-import, import-error # Third Party from common import session as httpsub diff --git a/documentcloud/documents/models/document.py b/documentcloud/documents/models/document.py index 441a6ae7..50563a45 100644 --- a/documentcloud/documents/models/document.py +++ b/documentcloud/documents/models/document.py @@ -744,6 +744,7 @@ def invalidate_cache(self): "X-Auth-Email": cloudflare_email, "X-Auth-Key": cloudflare_key, }, + timeout=10, ) def index_on_commit(self, **kwargs): diff --git a/documentcloud/documents/processing/info_and_image/main.py b/documentcloud/documents/processing/info_and_image/main.py index 45714395..7c198246 100755 --- a/documentcloud/documents/processing/info_and_image/main.py +++ b/documentcloud/documents/processing/info_and_image/main.py @@ -58,6 +58,7 @@ Workspace, ) else: + # fmt: off # Third Party import graft # only initialize sentry on serverless diff --git a/documentcloud/documents/processing/info_and_image/pdfium.py b/documentcloud/documents/processing/info_and_image/pdfium.py index 6f11790e..12f81d85 100755 --- a/documentcloud/documents/processing/info_and_image/pdfium.py +++ b/documentcloud/documents/processing/info_and_image/pdfium.py @@ -35,6 +35,7 @@ # Adapted from https://github.com/gersonkurz/pydfium # pylint: disable=too-many-positional-arguments + class FPDFLibraryConfig(Structure): _fields_ = [ ("version", c_int), diff --git a/documentcloud/documents/processing/ocr/tess.py b/documentcloud/documents/processing/ocr/tess.py index 3363fe61..a2375afd 100755 --- a/documentcloud/documents/processing/ocr/tess.py +++ b/documentcloud/documents/processing/ocr/tess.py @@ -16,6 +16,7 @@ LIB_PATH = os.path.join(script_dir, "tesseract/libtesseract.so.5") DATA_PATH = TMP_DIRECTORY + # pylint:disable = too-many-positional-arguments class TesseractError(Exception): pass diff --git a/documentcloud/documents/processing/tests/imagediff.py b/documentcloud/documents/processing/tests/imagediff.py index e9dea833..a6833b62 100755 --- a/documentcloud/documents/processing/tests/imagediff.py +++ b/documentcloud/documents/processing/tests/imagediff.py @@ -18,6 +18,7 @@ # Local from .report_generator import ReportGenerator + # pylint: disable=too-many-positional-arguments def same_images( test_image: str, diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py index 3c84e469..a7c28cf6 100644 --- a/documentcloud/documents/search.py +++ b/documentcloud/documents/search.py @@ -9,12 +9,12 @@ from datetime import datetime # Third Party +import pysolr from luqum.parser import ParseError, parser from luqum.tree import BaseOperation, Boost, Group, Not, Prohibit, Range, Unary, Word from luqum.utils import LuceneTreeTransformer, LuceneTreeVisitor # DocumentCloud -import pysolr from documentcloud.core.pagination import CursorPagination, PageNumberPagination from documentcloud.documents.constants import DATA_KEY_REGEX from documentcloud.documents.models import Document @@ -132,6 +132,7 @@ def search(user, query_params): "hl.weightMatches": settings.SOLR_HL_WEIGHT_MATCHES, **page_query_data, } + original_text_query = text_query if ( settings.SOLR_QUERY_NOTES and user.is_authenticated @@ -140,7 +141,6 @@ def search(user, query_params): ): # turn note queries on for all pro users # *:* returns all documents, do not enable note queries - original_text_query = text_query text_query = _add_note_query(text_query, user) kwargs["uf"] = "* _query_ -projects_edit_access" query_notes = True diff --git a/documentcloud/documents/solr.py b/documentcloud/documents/solr.py index a4faf172..58fdab09 100644 --- a/documentcloud/documents/solr.py +++ b/documentcloud/documents/solr.py @@ -31,11 +31,11 @@ import time # Third Party +import pysolr import requests from dateutil.parser import parse # DocumentCloud -import pysolr from config import celery_app from documentcloud.common import path from documentcloud.common.environment import storage diff --git a/documentcloud/documents/tasks.py b/documentcloud/documents/tasks.py index e7df6c29..dbf74e04 100644 --- a/documentcloud/documents/tasks.py +++ b/documentcloud/documents/tasks.py @@ -12,11 +12,11 @@ from datetime import date # Third Party +import pysolr import redis from requests.exceptions import HTTPError, RequestException # DocumentCloud -import pysolr from documentcloud.common.environment import httpsub, storage from documentcloud.core.choices import Language from documentcloud.documents import entity_extraction, modifications, solr diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 54b4b63b..166bbadf 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -8,13 +8,13 @@ from unittest.mock import Mock # Third Party +import pysolr import pytest import pytz from furl import furl from luqum.parser import parser # DocumentCloud -import pysolr from documentcloud.documents.choices import Access, Status from documentcloud.documents.models import Document from documentcloud.documents.search import ( diff --git a/documentcloud/documents/views.py b/documentcloud/documents/views.py index b3df1d15..dd8c9254 100644 --- a/documentcloud/documents/views.py +++ b/documentcloud/documents/views.py @@ -18,6 +18,7 @@ # Third Party import environ +import pysolr from django_filters import rest_framework as django_filters from drf_spectacular.openapi import OpenApiParameter from drf_spectacular.utils import OpenApiExample, OpenApiResponse, extend_schema @@ -26,7 +27,6 @@ from rest_flex_fields.utils import split_levels # DocumentCloud -import pysolr from documentcloud.addons.choices import Event from documentcloud.addons.models import AddOnEvent from documentcloud.common.environment import httpsub From 3c4570ec14a9f7111495a27dcc62074b09695a06 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Mon, 20 Apr 2026 16:59:53 -0500 Subject: [PATCH 13/40] More conflict markers --- config/urls.py | 1 - 1 file changed, 1 deletion(-) diff --git a/config/urls.py b/config/urls.py index 18b3df31..0727eded 100644 --- a/config/urls.py +++ b/config/urls.py @@ -92,7 +92,6 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): projects_router.register("documents", ProjectMembershipViewSet) projects_router.register("users", CollaborationViewSet) - router.register("documents/search/saved", SavedSearchViewSet, basename="saved_search") urlpatterns = [ From 48e37106184d4520080a64ab0de5cf2e13a6088a Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 21 Apr 2026 11:01:08 -0500 Subject: [PATCH 14/40] Remove collectfast --- config/settings/production.py | 7 ++++--- requirements/production.in | 1 - requirements/production.txt | 8 +------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/config/settings/production.py b/config/settings/production.py index 568f72f3..92ba3196 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -147,11 +147,12 @@ class StaticRootS3Boto3Storage(S3Boto3Storage): COMPRESS_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" # https://django-compressor.readthedocs.io/en/latest/settings/#django.conf.settings.COMPRESS_URL COMPRESS_URL = STATIC_URL -# Collectfast + +# Collectfast Remove later # ------------------------------------------------------------------------------ # https://github.com/antonagestam/collectfast#installation -INSTALLED_APPS = ["collectfast"] + INSTALLED_APPS # noqa F405 -COLLECTFAST_STRATEGY = "collectfast.strategies.boto3.Boto3Strategy" +# INSTALLED_APPS = ["collectfast"] + INSTALLED_APPS # noqa F405 +# COLLECTFAST_STRATEGY = "collectfast.strategies.boto3.Boto3Strategy" # LOGGING # ------------------------------------------------------------------------------ diff --git a/requirements/production.in b/requirements/production.in index 8d3921df..e292b828 100644 --- a/requirements/production.in +++ b/requirements/production.in @@ -2,7 +2,6 @@ gunicorn psycopg2 --no-binary psycopg2 -Collectfast sentry-sdk scout-apm diff --git a/requirements/production.txt b/requirements/production.txt index 7ee08655..5c5da4a4 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -118,8 +118,6 @@ click-repl==0.3.0 # via # -r requirements/base.txt # celery -collectfast==2.2.0 - # via -r requirements/production.in cpuprofile==1.0.1 # via -r requirements/base.txt cryptography==46.0.7 @@ -155,7 +153,6 @@ deprecated==1.3.1 django==5.2.13 # via # -r requirements/base.txt - # collectfast # daily-active-users # django-anymail # django-appconf @@ -222,9 +219,7 @@ django-redis==5.0.0 django-robots==6.1 # via -r requirements/base.txt django-storages[boto3]==1.14.6 - # via - # -r requirements/production.in - # collectfast + # via -r requirements/production.in djangorestframework==3.16.1 # via # -r requirements/base.txt @@ -646,7 +641,6 @@ typing-extensions==4.15.0 # via # -r requirements/base.txt # aiosignal - # collectfast # grpcio # opentelemetry-api # opentelemetry-sdk From d25f29ac14bac847188f3cc9578b2f42a9383d97 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:52:15 -0500 Subject: [PATCH 15/40] Try checking if table already exists and no-op if it does for addons --- .../migrations/0013_user_active_addons.py | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/documentcloud/users/migrations/0013_user_active_addons.py b/documentcloud/users/migrations/0013_user_active_addons.py index 6d2ca256..1058a8ac 100644 --- a/documentcloud/users/migrations/0013_user_active_addons.py +++ b/documentcloud/users/migrations/0013_user_active_addons.py @@ -1,21 +1,36 @@ from django.db import migrations, models -class Migration(migrations.Migration): +class Migration(migrations.Migration): dependencies = [ ("users", "0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username"), ("users", "0007_auto_20211102_1707"), ] - operations = [ - migrations.AddField( - model_name="user", - name="active_addons", - field=models.ManyToManyField( - help_text="Add-Ons shown for this user", - related_name="users", - to="addons.addon", - verbose_name="active add-ons", - ), - ), + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AddField( + model_name="user", + name="active_addons", + field=models.ManyToManyField( + help_text="Add-Ons shown for this user", + related_name="users", + to="addons.addon", + verbose_name="active add-ons", + ), + ), + ], + database_operations=[ + migrations.RunSQL( + sql=""" + CREATE TABLE IF NOT EXISTS users_user_active_addons ( + id serial NOT NULL PRIMARY KEY, + user_id integer NOT NULL REFERENCES users_user(id) DEFERRABLE INITIALLY DEFERRED, + addon_id bigint NOT NULL REFERENCES addons_addon(id) DEFERRABLE INITIALLY DEFERRED + ) + """, + reverse_sql="DROP TABLE IF EXISTS users_user_active_addons", + ), + ], + ) ] \ No newline at end of file From 7262d449681ca367355fae9ea479e13384a60c46 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:18:50 -0500 Subject: [PATCH 16/40] Fix language.get_choice --- documentcloud/documents/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentcloud/documents/tasks.py b/documentcloud/documents/tasks.py index dbf74e04..d3ac75de 100644 --- a/documentcloud/documents/tasks.py +++ b/documentcloud/documents/tasks.py @@ -135,7 +135,7 @@ def process(document_pk, user_pk, org_pk, force_ocr, ocr_engine): "slug": document.slug, "extension": document.original_extension, "access": document.access, - "ocr_code": Language.get_choice(document.language).ocr_code, + "ocr_code": Language(document.language).ocr_code, "method": "process_pdf", "user_id": user_pk, "org_id": org_pk, @@ -163,7 +163,7 @@ def redact(document_pk, user_pk, redactions): "doc_id": document_pk, "slug": document.slug, "access": document.access, - "ocr_code": Language.get_choice(document.language).ocr_code, + "ocr_code": Language(document.language).ocr_code, "redactions": redactions, }, redact, From 2b8652caf471fc7d69196e8f1559ae8199d162b6 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:36:37 -0500 Subject: [PATCH 17/40] Fix package upgrade regressions: smart_open 5.x API migration and Pillow ANTIALIAS removal --- .../common/environment/aws/storage.py | 36 +++++++++++-------- .../processing/info_and_image/main.py | 2 +- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 1a480b02..2ac39097 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -58,23 +58,29 @@ def size(self, file_name): return bucket.Object(key).content_length def open(self, file_name, mode="rb", content_type=None, access=None): - + # This logic changed with smart_open 5.0 + # https://github.com/piskvorky/smart_open/blob/develop/CHANGELOG.md#500-30-mar-2021 + # See migration guide here: + # https://github.com/piskvorky/smart_open/blob/develop/MIGRATING_FROM_OLDER_VERSIONS.rst + # This is only called locally, serverless lambda has its own stack that uses smart_open 1.8.4 transport_params = { - "resource_kwargs": self.resource_kwargs, - "multipart_upload_kwargs": {}, + "client": self.s3_client, } - - if content_type is None: - # attempt to guess content type if not specified - content_type = mimetypes.guess_type(file_name)[0] - - if content_type is not None: - # set content type if we have one - transport_params["multipart_upload_kwargs"]["ContentType"] = content_type - - if access is not None: - transport_params["multipart_upload_kwargs"]["ACL"] = ACLS[access] - + if "w" in mode: # Setting these kwargs only make sense in a write context + writeable_kwargs = {} + if content_type is None: + # attempt to guess content type if not specified + content_type = mimetypes.guess_type(file_name)[0] + if content_type is not None: + # set content type if we have one + writeable_kwargs["ContentType"] = content_type + if access is not None: + writeable_kwargs["ACL"] = ACLS[access] + if writeable_kwargs: + # Guard against no writeable kwargs provided + transport_params["client_kwargs"] = { + "S3.Client.create_multipart_upload": writeable_kwargs + } return smart_open.open( f"s3://{file_name}", mode, transport_params=transport_params ) diff --git a/documentcloud/documents/processing/info_and_image/main.py b/documentcloud/documents/processing/info_and_image/main.py index 7c198246..6290b426 100755 --- a/documentcloud/documents/processing/info_and_image/main.py +++ b/documentcloud/documents/processing/info_and_image/main.py @@ -768,7 +768,7 @@ def extract_single_page(doc_id, slug, access, page, page_number, large_image_pat image_width, max(round(img_buffer.height * (image_width / img_buffer.width)), 1), ), - Image.ANTIALIAS, + Image.LANCZOS, ) mem_file = io.BytesIO() From a66d9102412748f376cfb626fc2f292f0a207977 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:44:27 -0500 Subject: [PATCH 18/40] Fix Minio locally (requires) squarelet #621 --- .../management/commands/initialize_minio.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 documentcloud/core/management/commands/initialize_minio.py diff --git a/documentcloud/core/management/commands/initialize_minio.py b/documentcloud/core/management/commands/initialize_minio.py new file mode 100644 index 00000000..fd285c89 --- /dev/null +++ b/documentcloud/core/management/commands/initialize_minio.py @@ -0,0 +1,50 @@ +import json +import boto3 +import environ +from botocore.client import Config +from botocore.exceptions import ClientError +from django.core.management.base import BaseCommand + +env = environ.Env() + + +class Command(BaseCommand): + help = "Initialize Minio bucket and policies for local development" + + def handle(self, *args, **options): + if env.str("ENVIRONMENT") != "local-minio": + return + + client = boto3.client( + "s3", + endpoint_url=env.str("MINIO_URL"), + aws_access_key_id=env.str("MINIO_ROOT_USER"), + aws_secret_access_key=env.str("MINIO_ROOT_PASSWORD"), + config=Config(signature_version="s3v4"), + region_name="us-east-1", + ) + + # Create bucket if it doesn't exist + try: + client.head_bucket(Bucket="documents") + self.stdout.write("Bucket already exists") + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "404": # Bucket doesn't exist, create it + client.create_bucket(Bucket="documents") + self.stdout.write("Created documents bucket") + else: + raise + + # Set public read policy + policy = { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::documents/*" + }] + } + client.put_bucket_policy(Bucket="documents", Policy=json.dumps(policy)) + self.stdout.write("Minio initialized successfully") \ No newline at end of file From a1b10ea2df9eb39ce5b218599db06627b63b3b26 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:55:57 -0500 Subject: [PATCH 19/40] Correct comment --- README.md | 5 +---- config/settings/base.py | 2 +- documentcloud/common/environment/aws/storage.py | 1 - documentcloud/common/environment/minio/storage.py | 4 ++-- initialize_dotenvs.py | 7 ++++--- local.yml | 6 ++++-- tasks.py | 4 ++++ 7 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index baffe897..7b8f77de 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,7 @@ You must first have these set up and ready to go: ``` 12. Go to [Django admin for DocumentCloud](https://api.dev.documentcloud.org/admin) and add the required static [flat page](https://api.dev.documentcloud.org/admin/flatpages/flatpage/) called `/tipofday/`. It can be blank. Do not prefix the URL with `/pages/`. Specifying the `Site` as `example.com` is alright. 13. Create an initial Minio bucket to simulate AWS S3 locally: - - Reference your DocumentCloud `.django` file for these variables: - - Visit the `MINIO_URL` with a browser, likely at [this address](http://minio.documentcloud.org:9000), and login with the minio `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` - - At the bottom right corner click the round plus button and then click the first circle that appears above it to "create bucket". - - Create a bucket called `documents` + - Run `inv initialize-minio` 14. Upload a document: - **Check your memory allocation on Docker is at least 7gb.** A sign that you do not have enough memory allocated is if containers are randomly failing or if your system is swapping heavily, especially when uploading documents. - The "upload" button should not be grayed out (if it is, check your user organization Verified Journalist status above) diff --git a/config/settings/base.py b/config/settings/base.py index f7eb4a0b..152e36d8 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -463,7 +463,7 @@ BASE_URL = DOCCLOUD_URL PUBLIC_ASSET_URL = env( - "PUBLIC_ASSET_URL", default="http://minio.documentcloud.org:9000/documents/" + "PUBLIC_ASSET_URL", default="https://minio.documentcloud.org/documents/" ) PRIVATE_ASSET_URL = env("PRIVATE_ASSET_URL", default=f"{DOCCLOUD_API_URL}/files/") diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 2ac39097..534f1cd6 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -62,7 +62,6 @@ def open(self, file_name, mode="rb", content_type=None, access=None): # https://github.com/piskvorky/smart_open/blob/develop/CHANGELOG.md#500-30-mar-2021 # See migration guide here: # https://github.com/piskvorky/smart_open/blob/develop/MIGRATING_FROM_OLDER_VERSIONS.rst - # This is only called locally, serverless lambda has its own stack that uses smart_open 1.8.4 transport_params = { "client": self.s3_client, } diff --git a/documentcloud/common/environment/minio/storage.py b/documentcloud/common/environment/minio/storage.py index a2cc3c18..25781c67 100644 --- a/documentcloud/common/environment/minio/storage.py +++ b/documentcloud/common/environment/minio/storage.py @@ -13,8 +13,8 @@ def __init__(self, resource_kwargs=None, minio=True): if resource_kwargs is None: resource_kwargs = { "endpoint_url": env.str("MINIO_URL"), - "aws_access_key_id": env.str("MINIO_ACCESS_KEY"), - "aws_secret_access_key": env.str("MINIO_SECRET_KEY"), + "aws_access_key_id": env.str("MINIO_ROOT_USER"), + "aws_secret_access_key": env.str("MINIO_ROOT_PASSWORD"), "config": Config(signature_version="s3v4"), "region_name": "us-east-1", } diff --git a/initialize_dotenvs.py b/initialize_dotenvs.py index 829d0316..24547538 100755 --- a/initialize_dotenvs.py +++ b/initialize_dotenvs.py @@ -70,9 +70,10 @@ def random_string(n): { "name": "MinIO", "envvars": [ - ("MINIO_ACCESS_KEY", lambda: random_string(64)), - ("MINIO_SECRET_KEY", lambda: random_string(64)), - ("MINIO_URL", "http://minio.documentcloud.org:9000"), + ("MINIO_ROOT_USER", lambda: random_string(64)), + ("MINIO_ROOT_PASSWORD", lambda: random_string(64)), + ("MINIO_URL", "https://minio.documentcloud.org"), + ("AWS_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt"), ], }, ], diff --git a/local.yml b/local.yml index 1e9a049f..05387510 100644 --- a/local.yml +++ b/local.yml @@ -50,16 +50,18 @@ services: image: redis:5.0 documentcloud_minio: - image: minio/minio:RELEASE.2019-10-12T01-39-57Z + image: minio/minio:RELEASE.2024-12-18T13-15-44Z volumes: - local_minio_data:/data ports: - "9000:9000" - command: server /data + - "9001:9001" + command: server /data --console-address ":9001" env_file: - ./.envs/.local/.django networks: default: + squarelet_default: aliases: - minio.documentcloud.org diff --git a/tasks.py b/tasks.py index e989c12e..bcc2fc9a 100755 --- a/tasks.py +++ b/tasks.py @@ -233,6 +233,10 @@ def download_tesseract_data(c): """Download Tesseract data files. Needed to be able to do OCR locally.""" c.run("cd config/aws/lambda; ./build.sh") +@task +def initialize_minio(c): + """Initialize Minio bucket and policies for local development""" + c.run(DJANGO_RUN.format(cmd="python manage.py initialize_minio")) @task def deploy_lambdas(c, staging=False): From 6da2c9e2e86a35595dab9bde5cb2efe378079314 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:10:03 -0500 Subject: [PATCH 20/40] Isort and black --- .../common/environment/aws/storage.py | 2 +- .../management/commands/initialize_minio.py | 25 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 534f1cd6..11c7d7ab 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -65,7 +65,7 @@ def open(self, file_name, mode="rb", content_type=None, access=None): transport_params = { "client": self.s3_client, } - if "w" in mode: # Setting these kwargs only make sense in a write context + if "w" in mode: # Setting these kwargs only make sense in a write context writeable_kwargs = {} if content_type is None: # attempt to guess content type if not specified diff --git a/documentcloud/core/management/commands/initialize_minio.py b/documentcloud/core/management/commands/initialize_minio.py index fd285c89..40c510fb 100644 --- a/documentcloud/core/management/commands/initialize_minio.py +++ b/documentcloud/core/management/commands/initialize_minio.py @@ -1,9 +1,14 @@ +# Django +from django.core.management.base import BaseCommand + +# Standard Library import json + +# Third Party import boto3 import environ from botocore.client import Config from botocore.exceptions import ClientError -from django.core.management.base import BaseCommand env = environ.Env() @@ -30,7 +35,7 @@ def handle(self, *args, **options): self.stdout.write("Bucket already exists") except ClientError as e: error_code = e.response["Error"]["Code"] - if error_code == "404": # Bucket doesn't exist, create it + if error_code == "404": # Bucket doesn't exist, create it client.create_bucket(Bucket="documents") self.stdout.write("Created documents bucket") else: @@ -39,12 +44,14 @@ def handle(self, *args, **options): # Set public read policy policy = { "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Principal": "*", - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::documents/*" - }] + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::documents/*", + } + ], } client.put_bucket_policy(Bucket="documents", Policy=json.dumps(policy)) - self.stdout.write("Minio initialized successfully") \ No newline at end of file + self.stdout.write("Minio initialized successfully") From 21e59ec24c279fbbbe4f5d9965459abb490add07 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Wed, 29 Apr 2026 12:41:07 -0400 Subject: [PATCH 21/40] Filter on parameters.site, with noop if key is missing --- documentcloud/addons/tests/factories.py | 13 +++++++++++++ documentcloud/addons/views.py | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/documentcloud/addons/tests/factories.py b/documentcloud/addons/tests/factories.py index 546c9e46..42fb920a 100644 --- a/documentcloud/addons/tests/factories.py +++ b/documentcloud/addons/tests/factories.py @@ -1,6 +1,9 @@ # Third Party import factory +# DocumentCloud +from documentcloud.addons.choices import Event + class AddOnFactory(factory.django.DjangoModelFactory): name = factory.Sequence(lambda n: f"Add-On {n}") @@ -34,6 +37,16 @@ class Meta: model = "addons.AddOnRun" +class AddOnEventFactory(factory.django.DjangoModelFactory): + addon = factory.SubFactory("documentcloud.addons.tests.factories.AddOnFactory") + user = factory.SubFactory("documentcloud.users.tests.factories.UserFactory") + event = Event.disabled + parameters = {} + + class Meta: + model = "addons.AddOnEvent" + + class GitHubAccountFactory(factory.django.DjangoModelFactory): user = factory.SubFactory("documentcloud.users.tests.factories.UserFactory") diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index ce5ad84c..e73e7e33 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -741,6 +741,11 @@ class Filter(django_filters.FilterSet): model=AddOn, help_text="Filter runs by a specific add-on ID." ) dismissed = django_filters.BooleanFilter(help_text="Was this run dismissed?") + site = django_filters.CharFilter( + field_name="event__parameters__site", + lookup_expr="exact", + help_text="Filter runs by the `site` value in the event's parameters.", + ) class Meta: model = AddOnRun @@ -971,6 +976,11 @@ class Filter(django_filters.FilterSet): lookup_expr="exact", help_text="Filter events by a specific add-on ID.", ) + site = django_filters.CharFilter( + field_name="parameters__site", + lookup_expr="exact", + help_text="Filter events by the `site` value in their parameters.", + ) class Meta: model = AddOnEvent From 33454494a87c4ccbc5d17bfc291d33b99d7688a1 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Wed, 29 Apr 2026 12:47:40 -0400 Subject: [PATCH 22/40] Add a partial index --- ...28_addonevent_addonevent_param_site_idx.py | 24 +++++++++++++++++++ documentcloud/addons/models.py | 10 ++++++++ documentcloud/addons/views.py | 1 - 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 documentcloud/addons/migrations/0028_addonevent_addonevent_param_site_idx.py diff --git a/documentcloud/addons/migrations/0028_addonevent_addonevent_param_site_idx.py b/documentcloud/addons/migrations/0028_addonevent_addonevent_param_site_idx.py new file mode 100644 index 00000000..c3194242 --- /dev/null +++ b/documentcloud/addons/migrations/0028_addonevent_addonevent_param_site_idx.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.2 on 2026-04-29 16:43 + +from django.contrib.postgres.operations import AddIndexConcurrently +from django.db import migrations, models + + +class Migration(migrations.Migration): + + atomic = False + + dependencies = [ + ("addons", "0027_visualaddon"), + ] + + operations = [ + AddIndexConcurrently( + model_name="addonevent", + index=models.Index( + models.F("parameters__site"), + condition=models.Q(("parameters__has_key", "site")), + name="addonevent_param_site_idx", + ), + ), + ] diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index ef7cfc5d..7a38dca7 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -2,6 +2,7 @@ from django.conf import settings from django.core.cache import cache from django.db import models, transaction +from django.db.models import F, Q from django.utils.translation import gettext_lazy as _ # Standard Library @@ -575,6 +576,15 @@ class AddOnEvent(models.Model): help_text=_("Timestamp of when the add-on event was last updated"), ) + class Meta: + indexes = [ + models.Index( + F("parameters__site"), + name="addonevent_param_site_idx", + condition=Q(parameters__has_key="site"), + ), + ] + def __str__(self): return f"Event: {self.addon_id} - {self.event}" diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index e73e7e33..50d0ba30 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -8,7 +8,6 @@ from django.db.models import Q from django.db.models.aggregates import Count from django.db.models.expressions import Case, Exists, F, OuterRef, Value, When -from django.db.models.fields.related import ForeignKey from django.db.models.functions.text import Concat from django.http.response import ( Http404, From 8c48310d8af3382fef93443a4748dbbc38bac1c8 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:48:11 -0500 Subject: [PATCH 23/40] Add creds to lambda.yml --- .github/workflows/lambda.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index f06708b9..87937b20 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -1,18 +1,18 @@ name: Post-Deploy Lambda - on: deployment_status: - jobs: deploy-lambdas: runs-on: ubuntu-latest + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-1 steps: - uses: actions/checkout@v3 - - name: Show deployment info run: | echo "Deployment environment: ${{ github.event.deployment.environment }}" - - name: Run Lambda production deploy if: > github.event.deployment.environment == 'documentcloud-prod' && @@ -20,11 +20,10 @@ jobs: run: | echo "Deploying production lambda updates" bash config/aws/lambda/codeship_deploy_lambdas.sh prod-lambda - - name: Run Lambda staging deploy if: > github.event.deployment.environment == 'documentcloud-staging' && github.event.deployment_status.state == 'success' run: | echo "Deploying staging lambda updates" - bash config/aws/lambda/codeship_deploy_lambdas.sh staging-lambda --staging + bash config/aws/lambda/codeship_deploy_lambdas.sh staging-lambda --staging \ No newline at end of file From 3b7af4ce94e2bc52cebfc6d92c8c8a96cc057d6d Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 17:42:10 -0500 Subject: [PATCH 24/40] Specify 3.10 in lambda file only --- .github/workflows/lambda.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index 87937b20..506b9157 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -10,6 +10,10 @@ jobs: AWS_DEFAULT_REGION: us-east-1 steps: - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" - name: Show deployment info run: | echo "Deployment environment: ${{ github.event.deployment.environment }}" From 18a928adafe9cda41ae24b6c360837be4d0309b4 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:34:34 -0500 Subject: [PATCH 25/40] Try fixing public doc read access --- documentcloud/common/environment/aws/storage.py | 6 +++--- tasks.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 11c7d7ab..ac4aa695 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -77,9 +77,9 @@ def open(self, file_name, mode="rb", content_type=None, access=None): writeable_kwargs["ACL"] = ACLS[access] if writeable_kwargs: # Guard against no writeable kwargs provided - transport_params["client_kwargs"] = { - "S3.Client.create_multipart_upload": writeable_kwargs - } + # smart_open 7.x passes client_kwargs + # directly to put_object/create_multipart_upload + transport_params["client_kwargs"] = writeable_kwargs return smart_open.open( f"s3://{file_name}", mode, transport_params=transport_params ) diff --git a/tasks.py b/tasks.py index bcc2fc9a..c9aa4e3b 100755 --- a/tasks.py +++ b/tasks.py @@ -120,9 +120,9 @@ def format(c): cmd="black documentcloud --exclude migrations && " "black config/urls.py && " "black config/settings && " - "isort -rc documentcloud && " - "isort -rc config/urls.py && " - "isort -rc config/settings" + "isort documentcloud && " + "isort config/urls.py && " + "isort config/settings" ) ) From 3d7281d69d6a05c09b755e1af066d6d2a4fff78c Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 19:09:38 -0500 Subject: [PATCH 26/40] One more try --- documentcloud/common/environment/aws/storage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index ac4aa695..d4f5421b 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -77,9 +77,10 @@ def open(self, file_name, mode="rb", content_type=None, access=None): writeable_kwargs["ACL"] = ACLS[access] if writeable_kwargs: # Guard against no writeable kwargs provided - # smart_open 7.x passes client_kwargs - # directly to put_object/create_multipart_upload - transport_params["client_kwargs"] = writeable_kwargs + transport_params["client_kwargs"] = { + "S3.Client.put_object": writeable_kwargs, + "S3.Client.create_multipart_upload": writeable_kwargs, + } return smart_open.open( f"s3://{file_name}", mode, transport_params=transport_params ) From ea70132d15cba3ea5fc4fb73391cb6d6880361a4 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 20:17:37 -0500 Subject: [PATCH 27/40] Update lambda requirements to match what local sees --- config/aws/lambda/cloud-requirements.txt | 4 ++-- config/aws/lambda/info-and-image-requirements.txt | 8 ++++---- .../processing/document_conversion/requirements.txt | 2 +- .../documents/processing/info_and_image/requirements.txt | 8 ++++---- documentcloud/documents/processing/ocr/requirements.txt | 9 ++++----- .../documents/processing/utils/requirements.txt | 2 +- 6 files changed, 16 insertions(+), 17 deletions(-) diff --git a/config/aws/lambda/cloud-requirements.txt b/config/aws/lambda/cloud-requirements.txt index 8ac82d03..f5d6cd9c 100644 --- a/config/aws/lambda/cloud-requirements.txt +++ b/config/aws/lambda/cloud-requirements.txt @@ -1,2 +1,2 @@ -boto3==1.10.14 -smart-open==1.8.4 +boto3==1.40.61 +smart-open==7.5.1 diff --git a/config/aws/lambda/info-and-image-requirements.txt b/config/aws/lambda/info-and-image-requirements.txt index b0d872dc..7307fd2e 100644 --- a/config/aws/lambda/info-and-image-requirements.txt +++ b/config/aws/lambda/info-and-image-requirements.txt @@ -1,4 +1,4 @@ -boto3==1.21.21 -smart-open==1.8.4 -pdfplumber==0.5.28 -pikepdf==5.4.0 +boto3==1.40.61 +smart-open==7.5.1 +pdfplumber==0.11.9 +pikepdf==10.5.1 diff --git a/documentcloud/documents/processing/document_conversion/requirements.txt b/documentcloud/documents/processing/document_conversion/requirements.txt index 8a76067d..f0c8dcb3 100644 --- a/documentcloud/documents/processing/document_conversion/requirements.txt +++ b/documentcloud/documents/processing/document_conversion/requirements.txt @@ -4,5 +4,5 @@ django-environ==0.4.5 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 +requests==2.33.0 sentry-sdk==0.14.0 diff --git a/documentcloud/documents/processing/info_and_image/requirements.txt b/documentcloud/documents/processing/info_and_image/requirements.txt index 571e6581..26581327 100755 --- a/documentcloud/documents/processing/info_and_image/requirements.txt +++ b/documentcloud/documents/processing/info_and_image/requirements.txt @@ -1,12 +1,12 @@ -r cloud-requirements.txt -Pillow==9.2.0 -aioboto3==9.6.0 +Pillow==12.1.1 +aioboto3==15.5.0 django-environ==0.4.5 furl==2.1.0 -listcrunch==0.1.0 +listcrunch==1.0.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 +requests==2.33.0 sentry-sdk==0.14.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/ocr/requirements.txt b/documentcloud/documents/processing/ocr/requirements.txt index ab5f786c..e255247a 100755 --- a/documentcloud/documents/processing/ocr/requirements.txt +++ b/documentcloud/documents/processing/ocr/requirements.txt @@ -1,12 +1,11 @@ -boto3==1.26.115 -smart-open==1.8.4 - -Pillow==9.2.0 +boto3==1.40.61 +smart-open==7.5.1 +Pillow==12.1.1 cpuprofile==1.0.1 django-environ==0.4.5 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 +requests==2.33.0 sentry-sdk==0.14.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/utils/requirements.txt b/documentcloud/documents/processing/utils/requirements.txt index 8a76067d..f0c8dcb3 100644 --- a/documentcloud/documents/processing/utils/requirements.txt +++ b/documentcloud/documents/processing/utils/requirements.txt @@ -4,5 +4,5 @@ django-environ==0.4.5 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 +requests==2.33.0 sentry-sdk==0.14.0 From b5b689d2068753e4ba15f2fa5eacf9495140ec71 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 20:31:11 -0500 Subject: [PATCH 28/40] Update lambda to Python 3.12 --- .github/workflows/lambda.yml | 2 +- config/aws/lambda/template_params.yaml | 36 +++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index 506b9157..16ec478e 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -13,7 +13,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.12" - name: Show deployment info run: | echo "Deployment environment: ${{ github.event.deployment.environment }}" diff --git a/config/aws/lambda/template_params.yaml b/config/aws/lambda/template_params.yaml index 197738f5..8e03318d 100644 --- a/config/aws/lambda/template_params.yaml +++ b/config/aws/lambda/template_params.yaml @@ -56,7 +56,7 @@ Resources: ProcessDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_doc CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -74,7 +74,7 @@ Resources: GetProgressFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.get_progress CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -92,7 +92,7 @@ Resources: ImportDocumentsFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.import_documents CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -110,7 +110,7 @@ Resources: SidekickFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.sidekick CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -128,7 +128,7 @@ Resources: ProcessPdfFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_pdf CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -145,7 +145,7 @@ Resources: ProcessPageCacheFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_page_cache CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -162,7 +162,7 @@ Resources: ExtractImageFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.extract_image CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -179,7 +179,7 @@ Resources: RedactDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.redact_doc CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -196,7 +196,7 @@ Resources: RunTesseractFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.run_tesseract CodeUri: ./awsbin/ocr Environment: @@ -215,7 +215,7 @@ Resources: ConvertDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.run_document_conversion CodeUri: ./awsbin/document_conversion # Trigger function via SNS @@ -232,7 +232,7 @@ Resources: AssembleTextFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.assemble_page_text CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -249,7 +249,7 @@ Resources: ExtractTextFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.extract_text_position CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -266,7 +266,7 @@ Resources: StartImportFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.start_import CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -283,7 +283,7 @@ Resources: ImportDocumentFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.import_document CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -300,7 +300,7 @@ Resources: FinishImportFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.finish_import CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -317,7 +317,7 @@ Resources: ModifyDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.modify_doc CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -334,7 +334,7 @@ Resources: SidekickPreprocessFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.preprocess # https://github.com/model-zoo/scikit-learn-lambda/blob/master/layers.csv Layers: ["arn:aws:lambda:us-east-1:446751924810:layer:python-3-7-scikit-learn-0-23-1:2"] @@ -353,7 +353,7 @@ Resources: RetryErrorsFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.retry_errors CodeUri: ./awsbin/utils # Trigger function via SNS From 80a0d060b6126c7d8b76fa5406c435899caf329c Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 21:25:23 -0500 Subject: [PATCH 29/40] Upgrade django-environ, which was pulling in distutils which got dropped in 3.12 in hopes of fixing lambda --- .../documents/processing/document_conversion/requirements.txt | 2 +- .../documents/processing/info_and_image/requirements.txt | 2 +- documentcloud/documents/processing/ocr/requirements.txt | 2 +- documentcloud/documents/processing/utils/requirements.txt | 2 +- requirements/base.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/documentcloud/documents/processing/document_conversion/requirements.txt b/documentcloud/documents/processing/document_conversion/requirements.txt index f0c8dcb3..22686ebb 100644 --- a/documentcloud/documents/processing/document_conversion/requirements.txt +++ b/documentcloud/documents/processing/document_conversion/requirements.txt @@ -1,6 +1,6 @@ -r cloud-requirements.txt -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 diff --git a/documentcloud/documents/processing/info_and_image/requirements.txt b/documentcloud/documents/processing/info_and_image/requirements.txt index 26581327..e9b18d03 100755 --- a/documentcloud/documents/processing/info_and_image/requirements.txt +++ b/documentcloud/documents/processing/info_and_image/requirements.txt @@ -2,7 +2,7 @@ Pillow==12.1.1 aioboto3==15.5.0 -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 listcrunch==1.0.0 pebble==4.5.0 diff --git a/documentcloud/documents/processing/ocr/requirements.txt b/documentcloud/documents/processing/ocr/requirements.txt index e255247a..b894f8b2 100755 --- a/documentcloud/documents/processing/ocr/requirements.txt +++ b/documentcloud/documents/processing/ocr/requirements.txt @@ -2,7 +2,7 @@ boto3==1.40.61 smart-open==7.5.1 Pillow==12.1.1 cpuprofile==1.0.1 -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 diff --git a/documentcloud/documents/processing/utils/requirements.txt b/documentcloud/documents/processing/utils/requirements.txt index f0c8dcb3..22686ebb 100644 --- a/documentcloud/documents/processing/utils/requirements.txt +++ b/documentcloud/documents/processing/utils/requirements.txt @@ -1,6 +1,6 @@ -r cloud-requirements.txt -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 diff --git a/requirements/base.txt b/requirements/base.txt index f599d526..2660a3cb 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -133,7 +133,7 @@ django-cprofile-middleware==1.0.5 # via -r requirements/base.in django-debug-toolbar==6.2.0 # via -r requirements/base.in -django-environ==0.4.5 +django-environ==0.13.0 # via -r requirements/base.in django-extensions==3.2.3 # via -r requirements/base.in From 8ccb1fd1b001fde65f66c83d703c604e3594ce1b Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 21:47:19 -0500 Subject: [PATCH 30/40] Update sentry-sdk and pin setuptools to stop chasing what is causing the distutils to be missing --- config/aws/lambda/cloud-requirements.txt | 1 + .../documents/processing/document_conversion/requirements.txt | 2 +- .../documents/processing/info_and_image/requirements.txt | 2 +- documentcloud/documents/processing/ocr/requirements.txt | 2 +- documentcloud/documents/processing/utils/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/config/aws/lambda/cloud-requirements.txt b/config/aws/lambda/cloud-requirements.txt index f5d6cd9c..f6d55e7b 100644 --- a/config/aws/lambda/cloud-requirements.txt +++ b/config/aws/lambda/cloud-requirements.txt @@ -1,2 +1,3 @@ boto3==1.40.61 smart-open==7.5.1 +setuptools<81 diff --git a/documentcloud/documents/processing/document_conversion/requirements.txt b/documentcloud/documents/processing/document_conversion/requirements.txt index 22686ebb..37009855 100644 --- a/documentcloud/documents/processing/document_conversion/requirements.txt +++ b/documentcloud/documents/processing/document_conversion/requirements.txt @@ -5,4 +5,4 @@ furl==2.1.0 pebble==4.5.0 redis==3.4.1 requests==2.33.0 -sentry-sdk==0.14.0 +sentry-sdk==2.57.0 diff --git a/documentcloud/documents/processing/info_and_image/requirements.txt b/documentcloud/documents/processing/info_and_image/requirements.txt index e9b18d03..c544cb93 100755 --- a/documentcloud/documents/processing/info_and_image/requirements.txt +++ b/documentcloud/documents/processing/info_and_image/requirements.txt @@ -8,5 +8,5 @@ listcrunch==1.0.0 pebble==4.5.0 redis==3.4.1 requests==2.33.0 -sentry-sdk==0.14.0 +sentry-sdk==2.57.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/ocr/requirements.txt b/documentcloud/documents/processing/ocr/requirements.txt index b894f8b2..dc66da21 100755 --- a/documentcloud/documents/processing/ocr/requirements.txt +++ b/documentcloud/documents/processing/ocr/requirements.txt @@ -7,5 +7,5 @@ furl==2.1.0 pebble==4.5.0 redis==3.4.1 requests==2.33.0 -sentry-sdk==0.14.0 +sentry-sdk==2.57.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/utils/requirements.txt b/documentcloud/documents/processing/utils/requirements.txt index 22686ebb..37009855 100644 --- a/documentcloud/documents/processing/utils/requirements.txt +++ b/documentcloud/documents/processing/utils/requirements.txt @@ -5,4 +5,4 @@ furl==2.1.0 pebble==4.5.0 redis==3.4.1 requests==2.33.0 -sentry-sdk==0.14.0 +sentry-sdk==2.57.0 From 4758d14a841da478c4d6354b30faaa9ca864719e Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 30 Apr 2026 22:01:17 -0500 Subject: [PATCH 31/40] Add setuptools<81 to info and image --- config/aws/lambda/info-and-image-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/config/aws/lambda/info-and-image-requirements.txt b/config/aws/lambda/info-and-image-requirements.txt index 7307fd2e..d04b85bf 100644 --- a/config/aws/lambda/info-and-image-requirements.txt +++ b/config/aws/lambda/info-and-image-requirements.txt @@ -2,3 +2,4 @@ boto3==1.40.61 smart-open==7.5.1 pdfplumber==0.11.9 pikepdf==10.5.1 +setuptools<81 From 1b70c1408baa8d0d3a1f5fba3631aade8f544140 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 27 May 2026 19:40:29 -0500 Subject: [PATCH 32/40] =?UTF-8?q?Fix=20test=5Fdestroy=20mock=20patch=20and?= =?UTF-8?q?=20assertion=EE=81=96=EE=80=BB=EE=83=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- documentcloud/addons/tests/test_views.py | 107 ++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/documentcloud/addons/tests/test_views.py b/documentcloud/addons/tests/test_views.py index bf9f86d3..1062d242 100644 --- a/documentcloud/addons/tests/test_views.py +++ b/documentcloud/addons/tests/test_views.py @@ -11,7 +11,11 @@ # DocumentCloud from documentcloud.addons.models import AddOn, AddOnRun from documentcloud.addons.serializers import AddOnRunSerializer, AddOnSerializer -from documentcloud.addons.tests.factories import AddOnFactory, AddOnRunFactory +from documentcloud.addons.tests.factories import ( + AddOnEventFactory, + AddOnFactory, + AddOnRunFactory, +) from documentcloud.documents.choices import Access from documentcloud.users.tests.factories import UserFactory @@ -268,3 +272,104 @@ def test_destroy(self, client, mocker): response = client.delete(f"/api/addon_runs/{run.uuid}/") assert response.status_code == status.HTTP_204_NO_CONTENT cancel.assert_called_once() + + def test_filter_site(self, client): + """Filter runs by event parameters.site""" + user = UserFactory() + site = "https://www.example.com" + matching_event = AddOnEventFactory( + user=user, parameters={"site": site, "selector": "*"} + ) + other_event = AddOnEventFactory( + user=user, parameters={"site": "https://www.other.com"} + ) + no_site_event = AddOnEventFactory(user=user, parameters={"selector": "*"}) + matching_run = AddOnRunFactory(user=user, event=matching_event) + AddOnRunFactory(user=user, event=other_event) + AddOnRunFactory(user=user, event=no_site_event) + AddOnRunFactory(user=user, event=None) + client.force_authenticate(user=user) + response = client.get("/api/addon_runs/", {"site": site}) + assert response.status_code == status.HTTP_200_OK + uuids = [r["uuid"] for r in response.json()["results"]] + assert uuids == [str(matching_run.uuid)] + + def test_filter_site_absent_is_noop(self, client): + """Omitting the site filter returns all viewable runs""" + user = UserFactory() + with_site = AddOnEventFactory( + user=user, parameters={"site": "https://www.example.com"} + ) + without_site = AddOnEventFactory(user=user, parameters={}) + AddOnRunFactory(user=user, event=with_site) + AddOnRunFactory(user=user, event=without_site) + AddOnRunFactory(user=user, event=None) + client.force_authenticate(user=user) + response = client.get("/api/addon_runs/") + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["results"]) == 3 + + +@pytest.mark.django_db() +class TestAddOnEventAPI: + def test_filter_site(self, client): + """Filter events by parameters.site""" + user = UserFactory() + site = "https://www.example.com" + matching = AddOnEventFactory( + user=user, parameters={"site": site, "selector": "*"} + ) + AddOnEventFactory( + user=user, parameters={"site": "https://www.other.com", "selector": "*"} + ) + AddOnEventFactory(user=user, parameters={"selector": "*"}) + client.force_authenticate(user=user) + response = client.get("/api/addon_events/", {"site": site}) + assert response.status_code == status.HTTP_200_OK + ids = [r["id"] for r in response.json()["results"]] + assert ids == [matching.pk] + + def test_filter_site_absent_is_noop(self, client): + """Omitting the site filter returns all viewable events""" + user = UserFactory() + AddOnEventFactory(user=user, parameters={"site": "https://www.example.com"}) + AddOnEventFactory(user=user, parameters={}) + client.force_authenticate(user=user) + response = client.get("/api/addon_events/") + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["results"]) == 2 + + def test_filter_site_no_match(self, client): + """A site filter that matches nothing returns an empty list""" + user = UserFactory() + AddOnEventFactory(user=user, parameters={"site": "https://www.example.com"}) + AddOnEventFactory(user=user, parameters={}) + client.force_authenticate(user=user) + response = client.get( + "/api/addon_events/", {"site": "https://nope.example.com"} + ) + assert response.status_code == status.HTTP_200_OK + assert response.json()["results"] == [] + + def test_filter_message(self, client): + """Filter runs by message""" + user = UserFactory() + matching_run = AddOnRunFactory(user=user, message="data changed") + AddOnRunFactory(user=user, message="no changes detected") + AddOnRunFactory(user=user, message="") + client.force_authenticate(user=user) + response = client.get("/api/addon_runs/", {"message": "data changed"}) + assert response.status_code == status.HTTP_200_OK + uuids = [r["uuid"] for r in response.json()["results"]] + assert uuids == [str(matching_run.uuid)] + + def test_filter_message_absent_is_noop(self, client): + """Omitting the message filter returns all viewable runs""" + user = UserFactory() + AddOnRunFactory(user=user, message="data changed") + AddOnRunFactory(user=user, message="no changes detected") + AddOnRunFactory(user=user, message="") + client.force_authenticate(user=user) + response = client.get("/api/addon_runs/") + assert response.status_code == status.HTTP_200_OK + assert len(response.json()["results"]) == 3 From a9d018f2a59b8f0d43f5b99ae62a0427973c8a7e Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 27 May 2026 19:45:57 -0500 Subject: [PATCH 33/40] =?UTF-8?q?Restore=20message=20filter=20and=20labels?= =?UTF-8?q?=20removed=20during=20rebase=EE=81=96=EE=80=BB=EE=83=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- documentcloud/addons/views.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index 50d0ba30..6a8a6b7e 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -743,8 +743,15 @@ class Filter(django_filters.FilterSet): site = django_filters.CharFilter( field_name="event__parameters__site", lookup_expr="exact", + label="Site", help_text="Filter runs by the `site` value in the event's parameters.", ) + message = django_filters.CharFilter( + field_name="message", + lookup_expr="exact", + label="Message", + help_text="Filter runs by their progress message.", + ) class Meta: model = AddOnRun @@ -978,6 +985,7 @@ class Filter(django_filters.FilterSet): site = django_filters.CharFilter( field_name="parameters__site", lookup_expr="exact", + label="Site", help_text="Filter events by the `site` value in their parameters.", ) From ac30f136045af877497776352d6f7bd2078d9e59 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 27 May 2026 19:48:35 -0500 Subject: [PATCH 34/40] Add back data --- .../addons/migrations/0029_addonrun_data.py | 24 +++++++++++++++++++ documentcloud/addons/models.py | 8 +++++++ documentcloud/addons/serializers.py | 3 +++ 3 files changed, 35 insertions(+) create mode 100644 documentcloud/addons/migrations/0029_addonrun_data.py diff --git a/documentcloud/addons/migrations/0029_addonrun_data.py b/documentcloud/addons/migrations/0029_addonrun_data.py new file mode 100644 index 00000000..d346a83d --- /dev/null +++ b/documentcloud/addons/migrations/0029_addonrun_data.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.2 on 2026-05-15 20:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("addons", "0028_addonevent_addonevent_param_site_idx"), + ] + + operations = [ + migrations.AddField( + model_name="addonrun", + name="data", + field=models.JSONField( + blank=True, + default=dict, + help_text="Field to store arbitrary per-run data", + null=True, + verbose_name="data", + ), + ), + ] diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 7a38dca7..5fc9a0ed 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -372,6 +372,14 @@ class AddOnRun(models.Model): help_text=_("Timestamp of when the add-on run was last updated"), ) + data = models.JSONField( + _("data"), + default=dict, + help_text=_("Field to store arbitrary per-run data"), + blank=True, + null=True, + ) + def __str__(self): return f"Run: {self.addon_id} - {self.created_at}" diff --git a/documentcloud/addons/serializers.py b/documentcloud/addons/serializers.py index 5ccfa0e1..cd21d75d 100644 --- a/documentcloud/addons/serializers.py +++ b/documentcloud/addons/serializers.py @@ -91,6 +91,7 @@ class Meta: "parameters", "created_at", "updated_at", + "data", "active_w", "active", "default", @@ -155,6 +156,7 @@ class Meta: "credits_spent", "created_at", "updated_at", + "data", ] extra_kwargs = { "uuid": {"read_only": True}, @@ -244,6 +246,7 @@ class Meta: "scratch", "created_at", "updated_at", + "data", ] extra_kwargs = { "addon": {"queryset": AddOn.objects.none()}, From cdf6fc228bfaa4106cd270cc22fcfb93fd1889f6 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Wed, 27 May 2026 20:11:21 -0500 Subject: [PATCH 35/40] Remove data field from AddOnSerializer only --- documentcloud/addons/serializers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/documentcloud/addons/serializers.py b/documentcloud/addons/serializers.py index cd21d75d..2a355946 100644 --- a/documentcloud/addons/serializers.py +++ b/documentcloud/addons/serializers.py @@ -91,7 +91,6 @@ class Meta: "parameters", "created_at", "updated_at", - "data", "active_w", "active", "default", @@ -246,7 +245,6 @@ class Meta: "scratch", "created_at", "updated_at", - "data", ] extra_kwargs = { "addon": {"queryset": AddOn.objects.none()}, From bb5396f1264933931da42d86c46bfcc5869646ac Mon Sep 17 00:00:00 2001 From: Sanjin <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 28 May 2026 14:56:02 +0000 Subject: [PATCH 36/40] Update .github/workflows/lambda.yml Co-authored-by: Chris Amico --- .github/workflows/lambda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index 16ec478e..2b36d83f 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -10,7 +10,7 @@ jobs: AWS_DEFAULT_REGION: us-east-1 steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 + - name: Set up Python 3.12 uses: actions/setup-python@v4 with: python-version: "3.12" From 6b1d41853ab4846aa21e417f794553fb8f74f233 Mon Sep 17 00:00:00 2001 From: Sanjin <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 28 May 2026 15:24:02 +0000 Subject: [PATCH 37/40] Update config/settings/base.py Co-authored-by: Chris Amico --- config/settings/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/config/settings/base.py b/config/settings/base.py index 152e36d8..bcfab1f2 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -496,7 +496,6 @@ PROGRESS_URL = env("PROGRESS_URL", default="") IMPORT_URL = env("IMPORT_URL", default="") PROGRESS_TIMEOUT = env.int("PROGRESS_TIMEOUT", default=1) -# SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") # Auth LOGIN_URL = "/accounts/login/squarelet" From 83a9e8565c3776b05bb14a7b7ec4f785eead35ae Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 28 May 2026 10:30:25 -0500 Subject: [PATCH 38/40] Remove collect fast comments --- config/settings/production.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/config/settings/production.py b/config/settings/production.py index 92ba3196..84332a60 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -148,12 +148,6 @@ class StaticRootS3Boto3Storage(S3Boto3Storage): # https://django-compressor.readthedocs.io/en/latest/settings/#django.conf.settings.COMPRESS_URL COMPRESS_URL = STATIC_URL -# Collectfast Remove later -# ------------------------------------------------------------------------------ -# https://github.com/antonagestam/collectfast#installation -# INSTALLED_APPS = ["collectfast"] + INSTALLED_APPS # noqa F405 -# COLLECTFAST_STRATEGY = "collectfast.strategies.boto3.Boto3Strategy" - # LOGGING # ------------------------------------------------------------------------------ # https://docs.djangoproject.com/en/dev/ref/settings/#logging From 4cba0442dc3cfd0396fd2aa597090cf4d9328b62 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Thu, 28 May 2026 12:49:18 -0500 Subject: [PATCH 39/40] Remove duplicate data from rebase --- documentcloud/addons/models.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 22cc1d4d..ac640364 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -379,14 +379,6 @@ class AddOnRun(models.Model): null=True, ) - data = models.JSONField( - _("data"), - default=dict, - help_text=_("Field to store arbitrary per-run data"), - blank=True, - null=True, - ) - def __str__(self): return f"Run: {self.addon_id} - {self.created_at}" From 700793dd084266e91e9c34c5b534f46d7ed24638 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Fri, 29 May 2026 11:48:46 -0500 Subject: [PATCH 40/40] Add LFS support to pull in LibreOffice correctly --- .github/workflows/lambda.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index 2b36d83f..3019aba9 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -10,6 +10,14 @@ jobs: AWS_DEFAULT_REGION: us-east-1 steps: - uses: actions/checkout@v3 + with: + lfs: true + - name: Install Git LFS and pull files + run: | + sudo apt-get install -y git-lfs + git lfs install + git lfs fetch --all + git lfs checkout - name: Set up Python 3.12 uses: actions/setup-python@v4 with: @@ -30,4 +38,4 @@ jobs: github.event.deployment_status.state == 'success' run: | echo "Deploying staging lambda updates" - bash config/aws/lambda/codeship_deploy_lambdas.sh staging-lambda --staging \ No newline at end of file + bash config/aws/lambda/codeship_deploy_lambdas.sh staging-lambda --staging