Skip to content

Commit 0450498

Browse files
committed
work on getting json input to work
1 parent 7f69485 commit 0450498

4 files changed

Lines changed: 229 additions & 26 deletions

File tree

_includes/extractor_new.py

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import abc
99
import datetime
1010

11-
DEBUG=True
11+
DEBUG=False
1212

1313
import argparse
1414

@@ -218,11 +218,12 @@ def md_gen(self, mdart, md0={}):
218218

219219
# Other fields where the key or value requires minor conversion.
220220
elif mdkey == 'runs':
221-
runsSubruns = set()
222-
runs = set()
223-
for run, subrun in mdart.pop("runs", []):
224-
runs.add(run)
225-
runsSubruns.add(100000 * run + subrun)
221+
runsSubruns = []
222+
runs = []
223+
print (mdart['runs'])
224+
for run, subrun, runtype in mdart.pop("runs", []):
225+
if run not in runs: runs.append(run)
226+
if subrun not in runsSubruns: runsSubruns.append(100000 * run + subrun)
226227
md['core.runs'] = runs
227228
md['core.runs_subruns'] = runsSubruns
228229

@@ -346,12 +347,12 @@ def main():
346347
argparser.add_argument('--appversion',help='application version for metadata',type=str)
347348
argparser.add_argument('--appfamily',help='application family for metadata',type=str)
348349
argparser.add_argument('--file_type',help='file_type (mc or detector)',type=str)
349-
argparser.add_argument('--file_format',help='file_format (root, artroot ..)',type=str,required=True)
350+
argparser.add_argument('--file_format',help='file_format (root, artroot ..)',type=str)
350351
argparser.add_argument('--run_type',help='run_type - (fardet-hd, iceberg ...)',type=str)
351352
argparser.add_argument('--campaign',help='Value for dune.campaign for metadata',type=str)
352353
argparser.add_argument('--data_stream',help='Value for data_stream for metadata',type=str)
353-
argparser.add_argument('--data_tier',help='Value for data_tier for metadata',type=str,required=True)
354-
argparser.add_argument('--fcl_file',type=str,help="fcl file name",required=True)
354+
argparser.add_argument('--data_tier',help='Value for data_tier for metadata',type=str)
355+
argparser.add_argument('--fcl_file',type=str,help="fcl file name", default="unknown")
355356
argparser.add_argument('--requestid',help='Value for dune.requestid for metadata',type=str)
356357
#argparser.add_argument('--set_processed',help='Set for parent file as processed in metadata',action="store_true")
357358
argparser.add_argument('--strip_parents',help='Do not include the file\'s parents in metadata for declaration',action="store_true")
@@ -377,21 +378,7 @@ def main():
377378
mddict['metadata']={}
378379
print ("EXTRACTOR: building metadata from parent and args as no artroot dump available")
379380
# If --input_json is supplied, open that dict now and add it to the output json
380-
if args.input_json != None:
381-
if os.path.exists(args.input_json):
382-
try:
383-
arbjson = json.load(open(args.input_json,'r'))
384-
#print ("EXTRACTOR: arbjson",arbjson)
385-
arbjson.pop('name')
386-
arbjson.pop('namespace')
387-
for key in list(arbjson.keys()):
388-
mddict[key] = arbjson[key]
389-
except:
390-
print('Error loading input json file.',args.input_json)
391-
392-
else:
393-
print('warning, could not open the input json file', args.input_json)
394-
381+
395382

396383
if args.appname != None:
397384
mddict['metadata']['core.application.name'] = args.appname
@@ -456,13 +443,39 @@ def main():
456443
print ("EXTRACTOR: inheriting " + key + " from parent file " + thedid)
457444
print ("EXTRACTOR: setting namespace for output",args.namespace)
458445
mddict['namespace']=args.namespace
446+
447+
448+
if args.input_json != None:
449+
if os.path.exists(args.input_json):
450+
try:
451+
arbjson = json.load(open(args.input_json,'r'))
452+
print ("EXTRACTOR: arbjson",arbjson)
453+
#arbjson.pop('name')
454+
#arbjson.pop('namespace')
455+
if DEBUG: "got here"
456+
for key,val in arbjson["metadata"].items():
457+
458+
if DEBUG: print (key, val)
459+
newval = os.path.expandvars(val)
460+
if DEBUG: print (newval)
461+
if key in mddict["metadata"]:
462+
print ("EXTRACTOR: overriding ",key,mddict["metadata"][key],"with", newval, "from json file" )
463+
mddict["metadata"][key] = newval
464+
except:
465+
print('Error loading input json file.',args.input_json)
466+
467+
else:
468+
print('warning, could not open the input json file', args.input_json)
469+
459470
except TypeError:
460471
print('You have not implemented a defineMetaData function by providing an experiment.')
461472
print('No metadata keys will be saved')
462473
raise
463474
# mdtext = json.dumps(expSpecificMetadata.getmetadata(), indent=2, sort_keys=True)
464-
mdtext = json.dumps(mddict, indent=2, sort_keys=True)
465-
475+
476+
if DEBUG:
477+
mdtext = json.dumps(mddict, indent=2, sort_keys=True)
478+
print(mdtext)
466479
# if args.declare:
467480
# ih.declareFile(mdtext)
468481

_includes/job_config.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ export USERF=${USER} # make certain the grid knows who your are
88
export NUM_EVENTS=-1 # process them all
99
export FNALURL='https://fndcadoor.fnal.gov:2880/dune/scratch/users' # sends output to scratch
1010
export NAMESPACE="usertests" # don't change this unless doing production
11+
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/bin/bash
2+
:<<'EOF'
3+
4+
To use this jobscript to process 5 files from the dataset fardet-hd__fd_mc_2023a_reco2__full-reconstructed__v09_81_00d02__standard_reco2_dune10kt_nu_1x2x6__prodgenie_nu_dune10kt_1x2x6__out1__validation
5+
data and put the output logs in the `usertests` namespace and saves the output in /scratch
6+
7+
Use these commands to set up ahead of time:
8+
9+
export DUNE_VERSION=<dune version>
10+
export DUNE_QUALIFIER=<dune qualifier>
11+
export FCL_FILE=<top level fcl>
12+
export INPUT_TAR_DIR_LOCAL=<cvmfs directory returned by cvmfs>
13+
export MQL=<your file query>
14+
export DIRECTORY=<directory name inside the tar file>
15+
16+
(see jobs_config.sh for the full list)
17+
18+
Use this command to create the workflow:
19+
20+
justin simple-workflow \
21+
--mql "$MQL" \
22+
--jobscript submit_local_code.jobscript.sh --rss-mb 4000 \
23+
--output-pattern "*.root:${FNALURL}/${USERF}" --output-pattern "*.root.json:${FNALURL}/${USERF}" --env APP_TAG=${APP_TAG} --env DIRECTORY=${DIRECTORY} --scope $NAMESPACE --lifetime 30 --env INPUT_TAR_DIR_LOCAL=${INPUT_TAR_DIR_LOCAL} --env DUNE_VERSION=${DUNE_VERSION} --env DUNE_QUALIFIER=${DUNE_QUALIFIER} --env FCL_FILE=${FCL_FILE} --env NUM_EVENTS=${NUM_EVENTS} --env USERF=${USERF} --env NAMESPACE=${NAMESPACE} --description "${DESCRIPTION}"
24+
25+
see job_config.sh for explanations
26+
27+
EOF
28+
29+
# fcl file and DUNE software version/qualifier to be used
30+
FCL_FILE=${FCL_FILE:-${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/my_code/fcls/my_reco.fcl}
31+
APP_TAG=${APP_TAG:-unknown}
32+
33+
#DUNE_VERSION=${DUNE_VERSION:-v09_85_00d00}
34+
#DUNE_QUALIFIER=${DUNE_QUALIFIER:-e26:prof}
35+
36+
echo "------ set things up -------"
37+
echo "Check environment"
38+
echo "DIRECTORY=$DIRECTORY"
39+
echo "DUNE_VERSION=$DUNE_VERSION"
40+
echo "DUNE_QUALIFIER=$DUNE_QUALIFIER"
41+
echo "FCL_FILE=$FCL_FILE"
42+
echo "MQL=$MQL"
43+
echo "APP_TAG=$APP_TAG"
44+
echo "USERF=$USERF"
45+
echo "NUM_EVENTS=$NUM_EVENTS"
46+
echo "INPUT_TAR_DIR_LOCAL=$INPUT_TAR_DIR_LOCAL"
47+
echo "NAMESPACE=$NAMESPACE"
48+
49+
50+
51+
echo "Current working directory is `pwd`"
52+
53+
54+
# number of events to process from the input file
55+
if [ "$NUM_EVENTS" != "" ] ; then
56+
events_option="-n $NUM_EVENTS"
57+
fi
58+
59+
if [ "$PATCH_RUN" == "YES" ] ; then
60+
runid=${JUSTIN_WORKFLOW_ID:99999}
61+
run_option="-e ${runid}:0:0"
62+
fi
63+
64+
# First get an unprocessed file from this stage
65+
did_pfn_rse=`$JUSTIN_PATH/justin-get-file`
66+
67+
68+
if [ "$did_pfn_rse" = "" ] ; then
69+
echo "Nothing to process - exit jobscript"
70+
exit 0
71+
fi
72+
73+
# Keep a record of all input DIDs, for pdjson2meta file -> DID mapping
74+
echo "$did_pfn_rse" | cut -f1 -d' ' >>all-input-dids.txt
75+
76+
# pfn is also needed when creating justin-processed-pfns.txt
77+
pfn=`echo $did_pfn_rse | cut -f2 -d' '`
78+
did=`echo $did_pfn_rse | cut -f1 -d' '`
79+
80+
echo "Input PFN = $pfn"
81+
82+
echo "TARDIR ${INPUT_TAR_DIR_LOCAL}"
83+
echo "CODE DIR ${DIRECTORY}"
84+
85+
# Setup DUNE environment
86+
localProductsdir=`ls -c1d ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/localProducts*`
87+
88+
echo "localProductsdir ${localProductsdir}"
89+
90+
91+
# seems to require the right name for the setup script
92+
93+
echo " check that there is a setup in ${localProductsdir}"
94+
ls -lrt ${localProductsdir}/setup-grid
95+
ls -lrt ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/$FCL_FILE
96+
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
97+
export PRODUCTS="${localProductsdir}/:$PRODUCTS"
98+
99+
# Then we can set up our local products
100+
setup duneana "$DUNE_VERSION" -q "$DUNE_QUALIFIER"
101+
setup dunesw "$DUNE_VERSION" -q "$DUNE_QUALIFIER"
102+
103+
setup metacat
104+
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
105+
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune
106+
107+
source ${localProductsdir}/setup-grid
108+
mrbslp
109+
110+
#echo "----- code is set up -----"
111+
112+
# Construct outFile from input $pfn
113+
now=$(date -u +"%Y%m%d%H%M%SZ")
114+
Ffname=`echo $pfn | awk -F/ '{print $NF}'`
115+
fname=`echo $Ffname | awk -F. '{print $1}'`
116+
# outFile1 is artroot format
117+
# outFile2 is root format for analysis
118+
export outFile1=${fname}_${APP_TAG}_${now}.root
119+
export outFile2=${fname}_${APP_TAG}_tuple_${now}.root
120+
121+
# echo "make $outFile1"
122+
campaign="justIN.w${JUSTIN_WORKFLOW_ID}s${JUSTIN_STAGE_ID}"
123+
124+
# Here is where the LArSoft command is call it
125+
(
126+
# Do the scary preload stuff in a subshell!
127+
export LD_PRELOAD=${XROOTD_LIB}/libXrdPosixPreload.so
128+
# echo "$LD_PRELOAD"
129+
130+
131+
132+
#sam_metadata_dumper $pfn
133+
134+
echo "----- now run lar ------"
135+
136+
echo "lar -c ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/$FCL_FILE $events_option $run_option -o ${outFile1} -T ${outFile2} "$pfn" > ${fname}_${APP_TAG}_${now}.log 2>&1"
137+
138+
lar -c ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/$FCL_FILE $events_option $run_option -o ${outFile1} -T ${outFile2} "$pfn" > ${fname}_${APP_TAG}_${now}.log 2>&1
139+
)
140+
141+
larExit=$?
142+
# Subshell exits with exit code of last command
143+
144+
145+
echo "lar exit code $larExit"
146+
147+
echo '=== Start last 1000 lines of lar log file ==='
148+
tail -1000 ${fname}_${APP_TAG}_${now}.log
149+
echo '=== End last 1000 lines of lar log file ==='
150+
151+
152+
echo "$did" > justin-input-dids.txt
153+
154+
echo "--------make metadata---------"
155+
156+
#sam_metadata_dumper ${outFile1}
157+
158+
echo "python ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/extractor_new.py --infile=${outFile1} --appversion=$DUNE_VERSION --appname=${APP_TAG} --appfamily=larsoft --no_crc --inputDidsFile=justin-input-dids.txt --data_tier='full-reconstructed' --file_format='artroot' --fcl_file=${FCL_FILE} --namespace=${NAMESPACE} # > $outFile1.json"
159+
160+
python ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/extractor_new.py --infile=$outFile1 --appversion=$DUNE_VERSION --appname=${APP_TAG} --appfamily=larsoft --no_crc --inputDidsFile=justin-input-dids.txt --data_tier='full-reconstructed' --file_format='artroot' --fcl_file=${FCL_FILE} --namespace=${NAMESPACE} #> $outFile1.json
161+
162+
file1Exit=$?
163+
164+
#cat ${outFile1}.json
165+
166+
echo "------------ non-artroot metadata -----------"
167+
# here for non-artroot files, salvage what you can from outFile1
168+
169+
oldjson=${outFile1}.json
170+
171+
echo " python ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/extractor_new.py --infile=$outFile2 --appversion=$DUNE_VERSION --appname=${APP_TAG} --appfamily=larsoft --no_crc --inputDidsFile=justin-input-dids.txt --data_tier='root-tuple' --file_format='root' --fcl_file=${FCL_FILE} --no_extract --input_json=${PWD}/${oldjson} --namespace=${NAMESPACE} # > ${outFile2}.json"
172+
173+
python ${INPUT_TAR_DIR_LOCAL}/${DIRECTORY}/extractor_new.py --infile=$outFile2 --appversion=$DUNE_VERSION --appname=${APP_TAG} --appfamily=larsoft --no_crc --inputDidsFile=justin-input-dids.txt --data_tier='root-tuple' --file_format='root' --fcl_file=${FCL_FILE} --no_extract --input_json=${PWD}/${oldjson} --namespace=${NAMESPACE} # > ${outFile2}.json
174+
175+
file2Exit=$?
176+
177+
echo "------- finish up ------"
178+
if [ $larExit -eq 0 ] ; then
179+
# Success !
180+
echo "$pfn" > justin-processed-pfns.txt
181+
jobscriptExit=0
182+
else
183+
# Oh !
184+
jobscriptExit=1
185+
fi
186+
187+
# Create compressed tar file with all log files
188+
tar zcf `echo "$JUSTIN_JOBSUB_ID.logs.tgz" | sed 's/@/_/g'` *.log
189+
exit $jobscriptExit

files/usefulcode.tar

512 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)