-
Notifications
You must be signed in to change notification settings - Fork 0
Add Form 8-K parsing and event storage infrastructure #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -172,6 +172,11 @@ import { | |||||||
| CompanyFactsPrimaryKeyNames, | ||||||||
| CompanyFactsSchema, | ||||||||
| } from "../storage/facts/CompanyFactsSchema"; | ||||||||
| import { | ||||||||
| FORM_8K_EVENT_REPOSITORY_TOKEN, | ||||||||
| Form8KEventPrimaryKeyNames, | ||||||||
| Form8KEventSchema, | ||||||||
| } from "../storage/form-8k-event/Form8KEventSchema"; | ||||||||
|
|
||||||||
| export function resetDependencyInjectionsForTesting() { | ||||||||
| // Initialize Company repositories | ||||||||
|
|
@@ -432,4 +437,14 @@ export function resetDependencyInjectionsForTesting() { | |||||||
| ["entity_type", "entity_id"], | ||||||||
| ]) | ||||||||
| ); | ||||||||
|
|
||||||||
| // Initialize Form 8-K Event repository | ||||||||
| globalServiceRegistry.registerInstance( | ||||||||
| FORM_8K_EVENT_REPOSITORY_TOKEN, | ||||||||
| new InMemoryTabularStorage(Form8KEventSchema, Form8KEventPrimaryKeyNames, [ | ||||||||
| ["cik", "filing_date"], | ||||||||
| ["item_code"], | ||||||||
|
||||||||
| ["item_code"], | |
| ["item_code"], | |
| ["cik", "accession_number"], |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2025 Steven Roussey <sroussey@gmail.com> | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import { Type, Static } from "typebox"; | ||
| import { ENTITY_NAME_TYPE, SCHEMA_VERSION_TYPE, CIK_TYPE } from "../FormSchemaUtil"; | ||
|
|
||
| export const SubTypeList = Type.Union([Type.Literal("8-K"), Type.Literal("8-K/A")], { | ||
| description: "Submission Type Form", | ||
| }); | ||
|
|
||
| const SIGNATURE_TYPE = Type.Object({ | ||
| signatureName: Type.String({ minLength: 1, maxLength: 150 }), | ||
| signatureTitle: Type.Optional(Type.String({ maxLength: 150 })), | ||
| signatureDate: Type.Optional(Type.String()), | ||
| }); | ||
|
|
||
| export type Form8KSignature = Static<typeof SIGNATURE_TYPE>; | ||
|
|
||
| const SIGNATURE_BLOCK_TYPE = Type.Object({ | ||
| signature: Type.Union([SIGNATURE_TYPE, Type.Array(SIGNATURE_TYPE)]), | ||
| }); | ||
|
|
||
| const FILER_INFO_TYPE = Type.Object({ | ||
| filerCik: Type.Optional(CIK_TYPE), | ||
| filerCcc: Type.Optional(Type.String({ maxLength: 8 })), | ||
| }); | ||
|
|
||
| const HEADER_DATA_TYPE = Type.Object({ | ||
| filerInfo: Type.Optional(FILER_INFO_TYPE), | ||
| }); | ||
|
|
||
| const FORM_DATA_TYPE = Type.Object({ | ||
| items: Type.Optional( | ||
| Type.Object({ | ||
| item: Type.Union([Type.String(), Type.Array(Type.String())]), | ||
| }) | ||
| ), | ||
| periodOfReport: Type.Optional(Type.String()), | ||
| signatureBlock: Type.Optional(SIGNATURE_BLOCK_TYPE), | ||
| }); | ||
|
|
||
| /** | ||
| * Schema for 8-K filings submitted as structured XML through EDGAR. | ||
| */ | ||
| export const Form8KSchema = Type.Object({ | ||
| schemaVersion: Type.Optional(SCHEMA_VERSION_TYPE), | ||
| submissionType: Type.Optional(SubTypeList), | ||
| headerData: Type.Optional(HEADER_DATA_TYPE), | ||
| formData: Type.Optional(FORM_DATA_TYPE), | ||
| }); | ||
|
|
||
| export type Form8K = Static<typeof Form8KSchema>; | ||
|
|
||
| export const Form8KSubmissionSchema = Type.Object({ | ||
| edgarSubmission: Form8KSchema, | ||
| }); | ||
|
|
||
| export type Form8KSubmission = Static<typeof Form8KSubmissionSchema>; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2025 Steven Roussey <sroussey@gmail.com> | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import { Form8KEventRepo } from "../../../storage/form-8k-event/Form8KEventRepo"; | ||
| import { Form8KEvent } from "../../../storage/form-8k-event/Form8KEventSchema"; | ||
| import { PersonRepo } from "../../../storage/person/PersonRepo"; | ||
| import { CompanyRepo } from "../../../storage/company/CompanyRepo"; | ||
| import { hasCompanyEnding } from "../../../storage/company/CompanyNormalization"; | ||
| import { Form8K, Form8KSignature } from "./Form_8_K.schema"; | ||
| import { Form_8_K_ITEMS } from "./Form_8_K"; | ||
|
|
||
| const RELATION_TYPE_SIGNATURE = "form-8k:signature"; | ||
|
|
||
| /** | ||
| * Extracts item codes from the filing metadata `items` field. | ||
| * The items field is a comma-separated string of item codes (e.g., "2.02,9.01"). | ||
| * Also merges any items found in the parsed XML form data. | ||
| */ | ||
| function extractItemCodes(filingItems: string | undefined | null, form8K: Form8K): string[] { | ||
| const itemSet = new Set<string>(); | ||
|
|
||
| // Items from the filing index metadata (comma or semicolon separated) | ||
| if (filingItems) { | ||
| for (const raw of filingItems.split(/[,;]/)) { | ||
| const item = raw.trim(); | ||
| if (item) { | ||
| itemSet.add(item); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Items from parsed XML form data (if available) | ||
| if (form8K.formData?.items?.item) { | ||
| const xmlItems = form8K.formData.items.item; | ||
| const itemArray = Array.isArray(xmlItems) ? xmlItems : [xmlItems]; | ||
| for (const item of itemArray) { | ||
| const trimmed = item.trim(); | ||
| if (trimmed) { | ||
| itemSet.add(trimmed); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return [...itemSet].sort(); | ||
| } | ||
|
|
||
| async function processSignature( | ||
| cik: number, | ||
| signature: Form8KSignature | ||
| ): Promise<void> { | ||
| const companyRepo = new CompanyRepo(); | ||
| const personRepo = new PersonRepo(); | ||
|
|
||
| const signerName = signature.signatureName; | ||
| if (!signerName) return; | ||
|
|
||
| const signatureTitle = signature.signatureTitle; | ||
| const cleanTitles = [signatureTitle || "Signer"].filter(Boolean); | ||
|
|
||
| if (hasCompanyEnding(signerName)) { | ||
| const company = await companyRepo.saveCompany(signerName); | ||
| await companyRepo.saveRelatedEntity( | ||
| company.company_hash_id, | ||
| RELATION_TYPE_SIGNATURE, | ||
| cik, | ||
| cleanTitles | ||
| ); | ||
| } else { | ||
| const savedPerson = await personRepo.savePerson({ name: signerName }); | ||
| await personRepo.saveRelatedEntity( | ||
| savedPerson.person_hash_id, | ||
| RELATION_TYPE_SIGNATURE, | ||
| cik, | ||
| cleanTitles | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| export async function processForm8K({ | ||
| cik, | ||
| accession_number, | ||
| filing_date, | ||
| form, | ||
| items, | ||
| report_date, | ||
| form8K, | ||
| }: { | ||
| cik: number; | ||
| accession_number: string; | ||
| filing_date: string; | ||
| form: string; | ||
| items: string | undefined | null; | ||
| report_date: string | undefined | null; | ||
| form8K: Form8K; | ||
| }): Promise<void> { | ||
| const eventRepo = new Form8KEventRepo(); | ||
| const isAmendment = form === "8-K/A"; | ||
|
|
||
| // Use period of report from XML if available, fallback to filing metadata | ||
| const effectiveReportDate = form8K.formData?.periodOfReport ?? report_date ?? null; | ||
|
|
||
| // Extract and store individual 8-K event items | ||
| const itemCodes = extractItemCodes(items, form8K); | ||
|
|
||
| for (const itemCode of itemCodes) { | ||
| const event: Form8KEvent = { | ||
| cik, | ||
| accession_number, | ||
| item_code: itemCode, | ||
| item_description: Form_8_K_ITEMS[itemCode] ?? null, | ||
| filing_date, | ||
| report_date: effectiveReportDate, | ||
| is_amendment: isAmendment, | ||
| }; | ||
| await eventRepo.saveEvent(event); | ||
| } | ||
|
|
||
| // Process signatures from XML form data (if available) | ||
| if (form8K.formData?.signatureBlock?.signature) { | ||
| const signatures = form8K.formData.signatureBlock.signature; | ||
| const signatureArray = Array.isArray(signatures) ? signatures : [signatures]; | ||
|
|
||
| for (const signature of signatureArray) { | ||
| try { | ||
| await processSignature(cik, signature); | ||
| } catch (error) { | ||
| console.warn(`Failed to process 8-K signature:`, signature, error); | ||
| } | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
form_8k_eventsstorage is indexed on["cik","filing_date"]and["item_code"], butForm8KEventRepo.getEventsByAccession()queries by{ cik, accession_number }. Without an index that includesaccession_numberthis query will likely degrade to a full scan. Add an index such as["cik","accession_number"](and optionally["accession_number"]/["cik"]depending on expected query patterns).