WrightonLabCSU
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 37 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎bin/adjectives.py‎
Lines changed: 117 additions & 102 deletions b/‎bin/adjectives.py‎
Lines changed: 117 additions & 102 deletions
@@ -0,0 +1,3 @@
+[submodule "bin/rule_parser"]
+	path = bin/rule_parser
+	url = https://github.com/WrightonLabCSU/Rule-Parser
@@ -2,6 +2,43 @@
 
 All notable changes to this project will be documented in this file.
 
+## 2.0.0-beta23 - 2026-02-01
+
+[348e276](https://github.com/WrightonLabCSU/DRAM/commit/348e2764de7b666c0f6dcc1d82deea67d96e439b)...[852e0ae](https://github.com/WrightonLabCSU/DRAM/commit/852e0ae18ae5976e8594e8f68ed1938f7eafce9d)
+
+### Features
+
+- Add new DRAM rule parser submodule for traits and distill ([5e9a088](https://github.com/WrightonLabCSU/DRAM/commit/5e9a08872b590c3ac372047db46ddbaa946b8814))
+
+New python Lark base rule parser that defines rule grammer
+for traits grammer that can be reused for distill and product.
+This rule parser is more accurate and less error prone than
+the older traits parser completely custom coded. THis fixes
+a number of bugs where things were being double counted and not
+parsed correctly. This also allows distill, traits, and product
+eventually to all use the same rule parsing code. Rule parsing
+documentation can be found in docs/rules_parser.md or
+https://dramit.readthedocs.io/en/latest/rules_parser.html
+
+Rule parser is implemented with polars instead of Pandas for a
+few reasons. It is a bit faster, and it allows lazy query planning.
+The annotation df is not currently lazy (eager rn), but with plans
+to allow it to be lazy. Lazy DataFrames can be more memory efficient
+by only loading the data/columns needed, and doing query optimization
+to speed up and require less memory for intermediate steps.
+- Update summarize eco Ag sheet to newly developed Ag sheet ([4937f1c](https://github.com/WrightonLabCSU/DRAM/commit/4937f1ce846b3910e5fe18264f9daca8221dd207))
+
+
+- Add carbon rules to traits, save traits as excel sheet ([02de665](https://github.com/WrightonLabCSU/DRAM/commit/02de6657261fb5517b3df20985285a6b2b392fa7))
+
+
+
+### Package
+
+- Package cleanup. Removing old files and cleanups ([c6bae64](https://github.com/WrightonLabCSU/DRAM/commit/c6bae64a787623721ff402a38afd94bab63eb12d))
+
+
+
 ## 2.0.0-beta22 - 2025-12-17
 
 [64da24e](https://github.com/WrightonLabCSU/DRAM/commit/64da24e51367feaffe2f643dfbb0aa602e28c5c5)...[0933424](https://github.com/WrightonLabCSU/DRAM/commit/093342450e6e49c1a706cdf2c181bd931bab7a6d)
 
@@ -3,13 +3,10 @@
 import os
 import ast
 import click
+import polars as pl
 
-import pandas as pd
-
-from rule_adjectives.rule_graph import RuleParser, get_positive_genes
-from rule_adjectives.annotations import Annotations
-from utils.click_utils import validate_comma_separated
-
+from rule_parser.src.rules import evaluate_rules_on_anno
+from utils.excel import write_summarized_genomes_to_xlsx
 
 class PythonLiteralOption(click.Option):
 
@@ -20,7 +17,7 @@ def type_cast_value(self, ctx, value):
             raise click.BadParameter(value)
 
 
-def get_package_path(local_path):
+def get_assets_path(local_path):
     """
     Locate the package data or non python files
 
@@ -29,117 +26,135 @@ def get_package_path(local_path):
     """
     abs_snake_path = os.path.join(os.path.dirname(
         os.path.abspath(__file__)),
-                                  "rule_adjectives",
+                                  "assets",
         local_path)
     return abs_snake_path
 
 
-def list_adjectives(ctx, param, value):
-    if not value or ctx.resilient_parsing:
-        return
-    rules = RuleParser(get_package_path('rules.tsv'), verbose=False)
-    print("In the current rules file, these adjectives are available:")
-    for i in rules.data.index[~rules.data['name'].isna()].unique():
-        print(i)
+# def list_adjectives(ctx, param, value):
+#     if not value or ctx.resilient_parsing:
+#         return
+#     rules = RuleParser(get_assets_path('traits_rules.tsv'), verbose=False)
+#     print("In the current rules file, these adjectives are available:")
+#     for i in rules.data.index[~rules.data['name'].isna()].unique():
+#         print(i)
 
 
-def list_adjective_name(ctx, param, value):
-    if not value or ctx.resilient_parsing:
-        return
-    rules = RuleParser(get_package_path('rules.tsv'), verbose=False)
-    print("In the current rules file, these adjectives are available:")
-    for i in rules.data['name'].unique():
-        print(i)
+# def list_adjective_name(ctx, param, value):
+#     if not value or ctx.resilient_parsing:
+#         return
+#     rules = RuleParser(get_assets_path('traits_rules.tsv'), verbose=False)
+#     print("In the current rules file, these adjectives are available:")
+#     for i in rules.data['name'].unique():
+#         print(i)
 
-def show_rules_path(ctx, param, value):
-    if not value or ctx.resilient_parsing:
-        return
-    print(get_package_path('rules.tsv'))
+# def show_rules_path(ctx, param, value):
+#     if not value or ctx.resilient_parsing:
+#         return
+#     print(get_assets_path('traits_traits_rules.tsv'))
 
 
 @click.command()
 @click.option('--annotations', type=click.Path(exists=True), required=True, help="One of only 2 required files. Path to a DRAM annotations file.")
-@click.option('-o', '--output', type=click.Path(), default='adjectives.tsv', help="Path for the output table. A true false table created by this script.")
-@click.option('-a', '--adjectives_list', default="", callback=validate_comma_separated, help="A comma seperated list of adjectives ('adj1,adj2,adj3'), by name, to evaluate. This limits the number of adjectives that are evaluated and is faster.")
-@click.option('-p', '--plot_adjectives', multiple=True, default=[], help="A list of adjectives, by name, to plot. This limits the number of adjectives that are plotted and is probably needed for speed.")
-@click.option('-g', '--plot_genomes', multiple=True,
-              default=[], )
-@click.option('--plot_path', type=click.Path(exists=False),
-              default=None,
-              help='will become a folder of output plots, no path no plots.')
-@click.option('--strainer_tsv', type=click.Path(exists=False), default=None, help='The path for a tsv that will pass to strainer to filter genes. The only option at this time is pgtb for positive genes that are on true bugs.')
-@click.option('--strainer_type', type=click.Path(exists=False), default=None, help='The type of process that should make the strainer file.')
-@click.option('--debug_ids_by_fasta_to_tsv', type=click.Path(exists=False), default=None,
-              help='This is a tool to debug the list of IDs found by DRAM it is mostly for experts')
+@click.option('-o', '--output', type=click.Path(), default='traits.xlsx', help="Path for the output table. A true false table created by this script.")
 @click.option('--rules_tsv', type=click.Path(exists=True),
-              default=get_package_path('rules.tsv'),
+              default=get_assets_path('traits_rules.tsv'),
               help="This is an optional path to a rules file with strict formatting. It will over write the original rules file that is stored with the script.")
-@click.option('--show_rules_path', is_flag=True, callback=show_rules_path,
-              expose_value=False, is_eager=True,
-              help="Show the path to the default rules path.")
-@click.option('--list_name', is_flag=True, callback=list_adjective_name,
-              expose_value=False, is_eager=True,
-              help="List the names for all adjectives.tsv that are"
-                   " available, you can pass these names to limit the"
-                   " adjectives that are evaluated")
-@click.option('--list_id', is_flag=True, callback=list_adjectives,
-              expose_value=False, is_eager=True,
-              help="List the names for all adjectives.tsv that are"
-                   " available, you can pass these names to limit the"
-                   " adjectives that are evaluated")
+# @click.option('-a', '--adjectives_list', default="", callback=validate_comma_separated, help="A comma seperated list of adjectives ('adj1,adj2,adj3'), by name, to evaluate. This limits the number of adjectives that are evaluated and is faster.")
+# @click.option('-p', '--plot_adjectives', multiple=True, default=[], help="A list of adjectives, by name, to plot. This limits the number of adjectives that are plotted and is probably needed for speed.")
+# @click.option('-g', '--plot_genomes', multiple=True,
+#               default=[], )
+# @click.option('--plot_path', type=click.Path(exists=False),
+#               default=None,
+#               help='will become a folder of output plots, no path no plots.')
+# @click.option('--strainer_tsv', type=click.Path(exists=False), default=None, help='The path for a tsv that will pass to strainer to filter genes. The only option at this time is pgtb for positive genes that are on true bugs.')
+# @click.option('--strainer_type', type=click.Path(exists=False), default=None, help='The type of process that should make the strainer file.')
+# @click.option('--debug_ids_by_fasta_to_tsv', type=click.Path(exists=False), default=None,
+#               help='This is a tool to debug the list of IDs found by DRAM it is mostly for experts')
+# @click.option('--show_rules_path', is_flag=True, callback=show_rules_path,
+#               expose_value=False, is_eager=True,
+#               help="Show the path to the default rules path.")
+# @click.option('--list_name', is_flag=True, callback=list_adjective_name,
+#               expose_value=False, is_eager=True,
+#               help="List the names for all adjectives.tsv that are"
+#                    " available, you can pass these names to limit the"
+#                    " adjectives that are evaluated")
+# @click.option('--list_id', is_flag=True, callback=list_adjectives,
+#               expose_value=False, is_eager=True,
+#               help="List the names for all adjectives.tsv that are"
+#                    " available, you can pass these names to limit the"
+#                    " adjectives that are evaluated")
 # @click.argument('-p', type=click.Path(exists=True))
 def evaluate(annotations:str, output:str,
-             rules_tsv:str=get_package_path('rules.tsv'),
-             adjectives_list:list=None, plot_adjectives:list=None,
-             plot_genomes:list=None,plot_path:str=None,
-             debug_ids_by_fasta_to_tsv:str=None,
-             strainer_tsv:str=None, strainer_type='pgtb'):
+             rules_tsv:str=get_assets_path('traits_rules.tsv'),
+            #  adjectives_list:list=None, plot_adjectives:list=None,
+            #  plot_genomes:list=None,plot_path:str=None,
+            #  debug_ids_by_fasta_to_tsv:str=None,
+            #  strainer_tsv:str=None, strainer_type='pgtb'
+             ):
     """Using a DRAM annotations file make a table of adjectives."""
-    rules = RuleParser(rules_tsv, verbose=False, adjectives=adjectives_list)
-    annotations = Annotations(annotations)
-    adjectives = rules.check_genomes(annotations)
-    if debug_ids_by_fasta_to_tsv is not None:
-        annotations.ids_by_fasta.to_csv(debug_ids_by_fasta_to_tsv, sep='\t')
-        exit()
-    adjectives.to_csv(output, sep='\t')
-    # annotations.ids_by_fasta.iloc[1]['annotations']
-    if plot_path is not None:
-        rules.plot_cause(plot_path, adjectives=plot_adjectives,
-                         genomes=plot_genomes, show_steps=False
-                         )
-    if strainer_tsv is not None:
-        strainer_data = get_positive_genes(rules, annotations, adjectives)
-        strainer_data.to_csv(strainer_tsv, sep='\t')
-
-
-@click.command()
-@click.argument('plot_path', type=click.Path(exists=False),
-                default=None)#, help='will become a folder of output plots, no path no plots.')
-@click.option('-a', '--adjectives', multiple=True, default=[], help="A list of adjectives, by name, to evaluate. This limits the number of adjectives that are evaluated and is faster.")
-@click.option('--rules_tsv', type=click.Path(exists=True),
-              default=get_package_path('rules.tsv'),
-              help='The path that will become a folder of output plots, no path no plots.') # , help='The rules file which adhere to strict formating' )
-@click.option('--list_name', is_flag=True, callback=list_adjective_name,
-              expose_value=False, is_eager=True,
-              help="List the names for all adjectives.tsv that are"
-                   " available, you can pass these names to limit the"
-                   " adjectives that are evaluated")
-@click.option('--show_rules_path', is_flag=True, callback=show_rules_path,
-              expose_value=False, is_eager=True,
-              help="Show the path to the default rules path.")
-@click.option('--list_id', is_flag=True, callback=list_adjectives,
-              expose_value=False, is_eager=True,
-              help="List the names for all adjectives.tsv that are"
-                   " available, you can pass these names to limit the"
-                   " adjectives that are evaluated")
-# @click.argument('-p', type=click.Path(exists=True))
-def rule_plot(rules_tsv:str=get_package_path('rules.tsv'),
-              adjectives:list=None, plot_path:str=None):
-    """
-    Using a DRAM annotations file make a table of adjectives.
-    """
-    rules = RuleParser(rules_tsv, verbose=False, adjectives=adjectives)
-    rules.plot_rule(plot_path)
+    rules = pl.read_csv(rules_tsv, separator="\t")
+
+    dfs = {}
+    sample_col = "input_fasta"
+    group_by_col = "topic_ecosystem"
+    if group_by_col not in rules.columns:
+        rules = rules.with_columns(
+            pl.lit("traits").alias(group_by_col)
+        )
+    for name, data in rules.group_by(group_by_col):
+        dfs[name[0]] = evaluate_rules_on_anno(
+            rules=data.lazy(),
+            annotations_path=annotations,
+            sample_col=sample_col,
+        )
+    write_summarized_genomes_to_xlsx(
+        df=None,
+        output_file=output,
+        group_by=group_by_col,
+        sort_order_columns=sample_col,
+        extra_frames=dfs
+    )
+
+# Maybe someday we will add all of this back in?
+
+    # if plot_path is not None:
+    #     rules.plot_cause(plot_path, adjectives=plot_adjectives,
+    #                      genomes=plot_genomes, show_steps=False
+    #                      )
+    # if strainer_tsv is not None:
+    #     strainer_data = get_positive_genes(rules, annotations, adjectives)
+    #     strainer_data.to_csv(strainer_tsv, sep='\t')
+
+
+# @click.command()
+# @click.argument('plot_path', type=click.Path(exists=False),
+#                 default=None)#, help='will become a folder of output plots, no path no plots.')
+# @click.option('-a', '--adjectives', multiple=True, default=[], help="A list of adjectives, by name, to evaluate. This limits the number of adjectives that are evaluated and is faster.")
+# @click.option('--rules_tsv', type=click.Path(exists=True),
+#               default=get_assets_path('traits_rules.tsv'),
+#               help='The path that will become a folder of output plots, no path no plots.') # , help='The rules file which adhere to strict formating' )
+# @click.option('--list_name', is_flag=True, callback=list_adjective_name,
+#               expose_value=False, is_eager=True,
+#               help="List the names for all adjectives.tsv that are"
+#                    " available, you can pass these names to limit the"
+#                    " adjectives that are evaluated")
+# @click.option('--show_rules_path', is_flag=True, callback=show_rules_path,
+#               expose_value=False, is_eager=True,
+#               help="Show the path to the default rules path.")
+# @click.option('--list_id', is_flag=True, callback=list_adjectives,
+#               expose_value=False, is_eager=True,
+#               help="List the names for all adjectives.tsv that are"
+#                    " available, you can pass these names to limit the"
+#                    " adjectives that are evaluated")
+# # @click.argument('-p', type=click.Path(exists=True))
+# def rule_plot(rules_tsv:str=get_assets_path('traits_rules.tsv'),
+#               adjectives:list=None, plot_path:str=None):
+#     """
+#     Using a DRAM annotations file make a table of adjectives.
+#     """
+#     rules = RuleParser(rules_tsv, verbose=False, adjectives=adjectives)
+#     rules.plot_rule(plot_path)
 
 if __name__ == "__main__":
     evaluate()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "bin/rule_parser"]`
	`2`	`+ path = bin/rule_parser`
	`3`	`+ url = https://github.com/WrightonLabCSU/Rule-Parser`