Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c43ea53
First pass at upgrade to Django 5.2
duckduckgrayduck Apr 8, 2026
a65be71
Getting dependencies up to date
duckduckgrayduck Apr 9, 2026
e30b094
Add back opencv
duckduckgrayduck Apr 14, 2026
44b6655
Remove rootCA.pem
duckduckgrayduck Apr 14, 2026
bb5c7c6
Get tests working
duckduckgrayduck Apr 14, 2026
94e4165
Format
duckduckgrayduck Apr 14, 2026
d090b35
Pylint silence
duckduckgrayduck Apr 14, 2026
53323de
Bump python in workflow
duckduckgrayduck Apr 14, 2026
9e0aeec
Pylint complains about sorting isort won't fix, so let's ignore it
duckduckgrayduck Apr 15, 2026
6dadb2c
almost there
duckduckgrayduck Apr 15, 2026
ac6a5bc
Remove get-pip from workflow, outdated
duckduckgrayduck Apr 15, 2026
a232f41
Resolve checks
duckduckgrayduck Apr 20, 2026
3c4570e
More conflict markers
duckduckgrayduck Apr 20, 2026
48e3710
Remove collectfast
duckduckgrayduck Apr 21, 2026
d25f29a
Try checking if table already exists and no-op if it does for addons
duckduckgrayduck Apr 21, 2026
7262d44
Fix language.get_choice
duckduckgrayduck Apr 21, 2026
2b8652c
Fix package upgrade regressions: smart_open 5.x API migration and Pil…
duckduckgrayduck Apr 23, 2026
a66d910
Fix Minio locally (requires) squarelet #621
duckduckgrayduck Apr 23, 2026
a1b10ea
Correct comment
duckduckgrayduck Apr 23, 2026
6da2c9e
Isort and black
duckduckgrayduck Apr 23, 2026
21e59ec
Filter on parameters.site, with noop if key is missing
eyeseast Apr 29, 2026
3345449
Add a partial index
eyeseast Apr 29, 2026
8c48310
Add creds to lambda.yml
duckduckgrayduck Apr 30, 2026
3b7af4c
Specify 3.10 in lambda file only
duckduckgrayduck Apr 30, 2026
18a928a
Try fixing public doc read access
duckduckgrayduck Apr 30, 2026
3d7281d
One more try
duckduckgrayduck May 1, 2026
ea70132
Update lambda requirements to match what local sees
duckduckgrayduck May 1, 2026
b5b689d
Update lambda to Python 3.12
duckduckgrayduck May 1, 2026
80a0d06
Upgrade django-environ, which was pulling in distutils which got drop…
duckduckgrayduck May 1, 2026
8ccb1fd
Update sentry-sdk and pin setuptools to stop chasing what is causing …
duckduckgrayduck May 1, 2026
4758d14
Add setuptools<81 to info and image
duckduckgrayduck May 1, 2026
1b70c14
Fix test_destroy mock patch and assertion
duckduckgrayduck May 28, 2026
a9d018f
Restore message filter and labels removed during rebase
duckduckgrayduck May 28, 2026
ac30f13
Add back data
duckduckgrayduck May 28, 2026
9fc205a
Merge branch 'master' into django_5_upgrade
duckduckgrayduck May 28, 2026
cdf6fc2
Remove data field from AddOnSerializer only
duckduckgrayduck May 28, 2026
bb5396f
Update .github/workflows/lambda.yml
duckduckgrayduck May 28, 2026
6b1d418
Update config/settings/base.py
duckduckgrayduck May 28, 2026
83a9e85
Remove collect fast comments
duckduckgrayduck May 28, 2026
4cba044
Remove duplicate data from rebase
duckduckgrayduck May 28, 2026
700793d
Add LFS support to pull in LibreOffice correctly
duckduckgrayduck May 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions .github/workflows/lambda.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,37 @@
name: Post-Deploy Lambda

on:
deployment_status:

jobs:
deploy-lambdas:
runs-on: ubuntu-latest
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
steps:
- uses: actions/checkout@v3

with:
lfs: true
- name: Install Git LFS and pull files
run: |
sudo apt-get install -y git-lfs
git lfs install
git lfs fetch --all
git lfs checkout
- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: "3.12"
- name: Show deployment info
run: |
echo "Deployment environment: ${{ github.event.deployment.environment }}"

- name: Run Lambda production deploy
if: >
github.event.deployment.environment == 'documentcloud-prod' &&
github.event.deployment_status.state == 'success'
run: |
echo "Deploying production lambda updates"
bash config/aws/lambda/codeship_deploy_lambdas.sh prod-lambda

- name: Run Lambda staging deploy
if: >
github.event.deployment.environment == 'documentcloud-staging' &&
Expand Down
39 changes: 4 additions & 35 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
name: CI

on:
workflow_dispatch:
pull_request:
push:
branches:
- master
- staging

env:
PYENV_VERSION: '3.10'
PYENV_VERSION: '3.12'
DJANGO_SECRET_KEY: ${{secrets.SECRET_KEY}}
DJANGO_SETTINGS_MODULE: config.settings.test
DATABASE_URL: postgres://${PGUSER}:${PGPASSWORD}@127.0.0.1:5432/test
Expand Down Expand Up @@ -38,15 +36,10 @@ env:
DOCUMENT_BUCKET: ${{secrets.DOCUMENT_BUCKET}}
REDIS_PROCESSING_PASSWORD: ""
SAM_CLI_TELEMETRY: 0





jobs:
check:
runs-on: ubuntu-latest

services:
redis:
image: redis
Expand All @@ -63,30 +56,19 @@ jobs:
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: "3.10"
python-version: "3.12"
cache: "pip" # caching pip dependencies

- name: Install pip
run: |
wget https://bootstrap.pypa.io/pip/3.6/get-pip.py
python3 get-pip.py

- name: Install
run: |
pip install -r requirements/local.txt

- name: Lint
run: pylint documentcloud

- name: Isort
run: isort --check-only -rc documentcloud

run: isort --check-only --diff documentcloud
- name: Formatting
run: black --check documentcloud --exclude migrations

test:
runs-on: ubuntu-latest

services:
redis:
image: redis
Expand All @@ -97,7 +79,6 @@ jobs:
--health-interval 10s
--health-timeout 5s
--health-retries 5

postgres:
image: postgres
env:
Expand All @@ -113,33 +94,21 @@ jobs:
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432

steps:

- name: Checkout
uses: actions/checkout@v4

- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
python-version: "3.12"
cache: "pip" # caching pip dependencies

- name: Install pip
run: |
wget https://bootstrap.pypa.io/pip/3.6/get-pip.py
python3 get-pip.py

- name: Install requirements
run: |
pip install -r requirements/local.txt


- name: Test
run: pytest documentcloud -m "not solr"
env:
# use the credentials for the service container
PG_USER: test
PG_PASSWORD: ${{ secrets.PG_PASSWORD }}
DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,4 @@ CLAUDE.md
.claude

rootCA.pem
tests.txt
7 changes: 3 additions & 4 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[MASTER]
# https://stackoverflow.com/a/39207275/10952222
init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.join(os.path.dirname(find_pylintrc()), 'documentcloud'))"
load-plugins=pylint_django, pylint_celery
init-hook="import sys, os; sys.path.insert(0, '/app'); os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.local')"
load-plugins=pylint_django
ignore=migrations

[FORMAT]
Expand All @@ -11,7 +10,7 @@ max-args=6

[MESSAGES CONTROL]
enable=useless-suppression
disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured
disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured,wrong-import-order,wrong-import-position

[TYPECHECK]
generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete
Expand Down
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@ You must first have these set up and ready to go:
```
12. Go to [Django admin for DocumentCloud](https://api.dev.documentcloud.org/admin) and add the required static [flat page](https://api.dev.documentcloud.org/admin/flatpages/flatpage/) called `/tipofday/`. It can be blank. Do not prefix the URL with `/pages/`. Specifying the `Site` as `example.com` is alright.
13. Create an initial Minio bucket to simulate AWS S3 locally:
- Reference your DocumentCloud `.django` file for these variables:
- Visit the `MINIO_URL` with a browser, likely at [this address](http://minio.documentcloud.org:9000), and login with the minio `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY`
- At the bottom right corner click the round plus button and then click the first circle that appears above it to "create bucket".
- Create a bucket called `documents`
- Run `inv initialize-minio`
14. Upload a document:
- **Check your memory allocation on Docker is at least 7gb.** A sign that you do not have enough memory allocated is if containers are randomly failing or if your system is swapping heavily, especially when uploading documents.
- The "upload" button should not be grayed out (if it is, check your user organization Verified Journalist status above)
Expand Down
29 changes: 10 additions & 19 deletions compose/local/django/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,33 +1,27 @@
# FROM python:3.6-alpine
FROM matthewfeickert/docker-python3-ubuntu:3.10.5

FROM python:3.12-slim
ENV PYTHONUNBUFFERED 1

USER root

RUN apt-get -qq -y update && \
apt-get -qq -y install \
# Pip dependencies
python3-pip \
# Build dependencies
build-essential \
# Postgres dependencies
libpq-dev postgresql-client python3-psycopg2 \
# Tesseract dependencies
libjpeg-turbo8 libtiff5 \
libjpeg62-turbo libtiff6 \
# LibreOffice dependencies
libnss3-dev libcurl4-nss-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev && \
# Symlink bash and python
ln -sf bash /bin/sh && rm -f /usr/bin/python && \
ln -s /usr/bin/python3 /usr/bin/python && \
curl https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py && \
python3 get-pip.py --force-reinstall
libnss3-dev libcurl4-openssl-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev \
# Utilities
curl git

# set up makecert root CA
RUN curl http://localhost/rootCA.pem > /usr/local/share/ca-certificates/rootCA.crt && update-ca-certificates

# Requirements are installed here to ensure they will be cached.
COPY ./requirements /requirements
# RUN pip install --upgrade pip && pip install -r /requirements/local.txt
RUN pip install -r /requirements/local.txt

RUN pip install --upgrade "pip>=26,<27" && pip install -r /requirements/local.txt

COPY ./compose/production/django/entrypoint /entrypoint
RUN sed -i 's/\r//' /entrypoint && chmod +x /entrypoint
Expand All @@ -48,11 +42,8 @@ RUN sed -i 's/\r//' /start-flower && chmod +x /start-flower
# Entry point
# =-=-=-=-=-=

# Temporary measure to get pip-compile to work
# RUN pip install 'pip<19.2'

WORKDIR /app

ENV LD_LIBRARY_PATH /app/documentcloud/documents/processing/ocr/tesseract

ENTRYPOINT ["/entrypoint"]
ENTRYPOINT ["/entrypoint"]
21 changes: 9 additions & 12 deletions compose/production/django/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@

FROM python:3.6-alpine

FROM python:3.12-slim
ENV PYTHONUNBUFFERED 1

RUN apk update \
RUN apt-get -qq -y update && apt-get -qq -y install \
# psycopg2 dependencies
&& apk add --virtual build-deps gcc python3-dev musl-dev \
&& apk add postgresql-dev \
gcc python3-dev libpq-dev \
# Pillow dependencies
&& apk add jpeg-dev zlib-dev freetype-dev lcms2-dev openjpeg-dev tiff-dev tk-dev tcl-dev \
libjpeg62-turbo zlib1g-dev libfreetype6-dev liblcms2-dev libopenjp2-7-dev libtiff6 tk-dev tcl-dev \
# CFFI dependencies
&& apk add libffi-dev py-cffi
libffi-dev python3-cffi

RUN addgroup -S django \
&& adduser -S -G django django
RUN groupadd -r django \
&& useradd -r -g django django

# Requirements are installed here to ensure they will be cached.
# Requirements are installed here to ensure they will be cached.
COPY ./requirements /requirements
RUN pip install --no-cache-dir -r /requirements/production.txt \
RUN pip install --upgrade "pip>=26,<27" && pip install --no-cache-dir -r /requirements/production.txt \
&& rm -rf /requirements

COPY ./compose/production/django/entrypoint /entrypoint
Expand Down
5 changes: 3 additions & 2 deletions config/aws/lambda/cloud-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
boto3==1.10.14
smart-open==1.8.4
boto3==1.40.61
smart-open==7.5.1
setuptools<81
9 changes: 5 additions & 4 deletions config/aws/lambda/info-and-image-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
boto3==1.21.21
smart-open==1.8.4
pdfplumber==0.5.28
pikepdf==5.4.0
boto3==1.40.61
smart-open==7.5.1
pdfplumber==0.11.9
pikepdf==10.5.1
setuptools<81
Loading
Loading