Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# (c) Stefan Countryman 2016-2018
3"""
4Utilities for working with a Run Directory, i.e. a directory containing
5multiple subdirectories, one per event. Includes tools for automatically
6finding events that can be updated and keeping them updated. Useful for running
7a pipeline or batch job.
8"""
10import re
11import json
12from textwrap import dedent
13from datetime import datetime
14import logging
15import os
16from argparse import Action, Namespace
17from os.path import isfile, join
18from collections import namedtuple
19from fnmatch import fnmatch
20from glob import glob
21from llama.classes import (
22 ImmutableDict,
23 IntentException,
24 CoolDownException,
25)
26from llama.pipeline import DEFAULT_PIPELINE
27from llama.event import Event
28from llama.utils import (
29 archive_figs,
30 PAST_RUNS_DIR,
31 DEFAULT_RUN_DIR,
32 COLOR,
33 GenerationError,
34)
35from llama.vetoes import VetoException
36from llama.cli import CliParser
37from llama.pipeline import Parsers as PipeParsers
39LOGGER = logging.getLogger(__name__)
40DEFAULT_EVENT_GLOB = '*'
41SORTKEYS = ImmutableDict({
42 'mtime': lambda e: e.modification_time(),
43 'ctime': lambda e: e.change_time(),
44 'v0time': lambda e: e.v0_time(),
45 'lvk': lambda e: re.sub('([a-zA-Z]+[0-9]+)([a-zA-Z]+)',
46 lambda m: f'{m.group(1)}{m.group(2):0>4}',
47 e.eventid),
48})
51def downselect_events(events, **kwargs):
52 """Take a list of events and downselect them using the checks described in
53 ``Run.downselect``."""
54 # start with the full event list and only add each item to the results
55 # if it matches all given criteria.
56 results = list()
57 # a bunch of checks for the file handler and the query arg
58 checks = {
59 'eventid_filter': lambda e, q: fnmatch(e.eventid, q),
60 'fileexists': lambda e, q: isfile(join(e.eventdir, q)),
61 'fhexists': lambda e, q: q(e).exists(),
62 'fhnameexists': lambda e, q: e.files[q].exists(),
63 'fhmeta': lambda e, q: any(isfile(f) for f in q(e).meta.fullpaths),
64 'fhnamemeta': lambda e, q: any(isfile(f)
65 for f in e.files[q].meta.fullpaths),
66 'vetoed': lambda e, q: (e.flags['VETOED'] == 'true') == q,
67 'manual': lambda e, q: (e.flags['MANUAL'] == 'true') == q,
68 'v0before': lambda e, q: (e.v0_time() or q+1) < q,
69 'v0after': lambda e, q: (e.v0_time() or q-1) > q,
70 'modbefore': lambda e, q: e.modification_time() < q,
71 }
72 checks['modafter'] = lambda e, q: not checks['modbefore'](e, q)
73 # timestamp gets set when downselection applied
74 nowts = datetime.now().timestamp()
75 checks['sec_since_mod_gt'] = lambda e, q: checks['modbefore'](e, nowts - q)
76 checks['sec_since_mod_lt'] = lambda e, q: checks['modafter'](e, nowts - q)
77 checks['sec_since_v0_gt'] = lambda e, q: checks['v0before'](e, nowts - q)
78 checks['sec_since_v0_lt'] = lambda e, q: checks['v0after'](e, nowts - q)
79 invert = kwargs.pop('invert', False)
80 sortkey = kwargs.pop('sortkey', None)
81 limit = kwargs.pop('limit', None)
82 reverse = kwargs.pop('reverse', False)
83 for event in events:
84 for qkey in kwargs:
85 if not checks[qkey](event, kwargs[qkey]):
86 # if inverted, failing any check means you get added
87 if invert:
88 results.append(event)
89 break
90 else:
91 # if all checks pass and we are not inverting results, add the
92 # file handler
93 if not invert:
94 results.append(event)
95 if sortkey is not None:
96 results = sorted(results, key=sortkey, reverse=reverse)
97 return results[:limit]
100def past_runs(paths=(PAST_RUNS_DIR,), pipeline=DEFAULT_PIPELINE):
101 """Get a dictionary of run names and corresponding ``Run``
102 instances, looking for run directories in the specified paths.
104 Parameters
105 ----------
106 paths : tuple, optional
107 Directories in which to search for past run directories.
108 pipeline : llama.pipeline.Pipeline
109 The pipeline to use for the returned ``Run`` instances in ``runs``.
111 Returns
112 -------
113 runs : dict
114 A dictionary of past runs whose keys are the name of the rundir and
115 whose values are the corresponding ``Run``. Absolute paths are used as
116 the keys.
117 """
118 rundirs = [os.path.split(d)[0]
119 for p in paths
120 for d in glob(os.path.join(p, '*/'))]
121 return {d: Run(rundir=d, pipeline=pipeline) for d in rundirs}
124RunTuple = namedtuple("RunTuple", ("rundir", "pipeline", "downselection"))
127class Run(RunTuple):
128 """
129 A single directory containing multiple event directories combined with a
130 pipeline (i.e. a selection of analysis steps to use) and a set of
131 downselection criteria for picking events:
133 Run Directory
134 ├─ Event Directory 1
135 ├─ Event Directory 2
136 └─ Event Directory 3
138 This should ordinarily correspond to a run of some sort (an observing run,
139 engineering run, offline run, test run, etc.) where the events are somehow
140 related. Since this class mostly just provides methods for organizing and
141 selecting ``Event`` instances with tailored ``Pipeline`` instances, it's up
142 to you to decide how to best organize a run. Run objects are immutable to
143 simplify hashing and uniqueness checks.
145 These tools allow the user to conveniently check on the status of all
146 events in a given ``Run``. A dictionary of downselection arguments (as
147 fed to ``downselect``) can be used to restrict the set of events that will
148 be returned ``events``.
150 Parameters
151 ----------
152 rundir : str
153 The directory where all events are stored. Files for individual
154 events are stored in per-event subdirectories of ``rundir``. Will be
155 converted to a canonical path with ``os.path.realpath`` to help ensure
156 unique ``Run`` definitions.
157 pipeline : llama.pipeline.Pipeline, optional
158 A ``Pipeline`` instance holding ``FileHandler`` classes that should
159 be used for this analysis. Defaults to the main pipeline in
160 production use.
161 downselection : tuple, optional
162 A tuple of dictionaries of keyword arguments of the type passed to
163 ``downselect``. The events returned by ``events`` will match these
164 downselection criteria with each downselection dict applied in the
165 order they appear in this argument (to allow more complex chained
166 downselections). You probably don't want to manually specify this;
167 a more pythonic way to provide downselection arguments is to use
168 the ``downselect`` method to return a downselection from a starting
169 ``Run``.
170 """
172 def __new__(cls, rundir=DEFAULT_RUN_DIR, pipeline=DEFAULT_PIPELINE,
173 downselection=tuple()):
174 return RunTuple.__new__(cls, os.path.realpath(rundir), pipeline,
175 downselection)
177 @property
178 def events(self):
179 """Return a list of events in this run directory with
180 ``self.downselection`` criteria applied (see ``downselect`` for a list
181 of possible downselection criteria).
183 Parameters
184 ----------
185 sortkey : function, optional
186 A sorting key (as passed to ``sorted``) to use to sort the returned
187 events. If none is provided, the events will be sorted based on
188 astrophysical event time using ``Event.gpstime``; beware that an
189 error will be raised if this quantity is ill-defined for ANY of the
190 returned events.
191 reverse : bool, optional
192 Whether to reverse the default sort order, i.e. put in descending
193 order. ``True`` by default so that the most-recently-occuring
194 events are first in the list.
195 """
196 events = [Event.fromdir(p, pipeline=self.pipeline)
197 for p in glob(join(self.rundir, '*', ''))]
198 for downselect_args in self.downselection:
199 events = downselect_events(events, **downselect_args)
200 return events
202 def update(self, **downselect):
203 """
204 Get a list of ``Event`` instances matching this ``Run`` instance's
205 downselection criteria and update each event directory. Run until all
206 events are up-to-date. Will queue files from each event that are ready
207 to update, allowing them to be handled in parallel, and will track
208 outstanding jobs. Optionally specify a ``FileGraph.downselect``
209 downselection argument to pass to each ``FileGraph`` being updated
210 (default is to regenerate all files needing regeneration). Be careful
211 with this argument, as it will cause file generation attempts for
212 matching files without checking whether they need to be generated.
213 """
214 running_files: List[Tuple[FileHandler, Future]] = []
215 processed_files: List[FileHandler] = []
216 while True:
217 for _i in range(len(running_files)):
218 filehandler, future = running_files.pop(0)
219 if future.running():
220 running_files.append((filehandler, future))
221 continue
222 try:
223 res = future.result()
224 if filehandler != res:
225 raise RuntimeError("Got the wrong FileHandler back "
226 "from our executor. Expected %s, "
227 "got %s", filehandler, res)
228 LOGGER.info('%sFinished generating %s%s', COLOR.GREEN,
229 filehandler, COLOR.CLEAR)
230 except (IntentException, CoolDownException):
231 pass
232 except GenerationError as err:
233 LOGGER.warning('%sGenerationError while generating %s: '
234 '%s%s', COLOR.YELLOW, filehandler, err,
235 COLOR.CLEAR)
236 except VetoException as err:
237 LOGGER.info('%sVetoed %s%s',
238 COLOR.GREEN, filehandler, COLOR.CLEAR)
239 processed_files.append(filehandler)
240 for event in self.events:
241 new_files = event.files.downselect(
242 invert=True,
243 equals=processed_files+[fh for fh, f in running_files],
244 )
245 running_files += new_files.update(**downselect)
246 if not running_files:
247 break
248 if processed_files:
249 LOGGER.info('%sRun.update complete. Processed files: %s%s',
250 COLOR.GREEN, processed_files, COLOR.CLEAR)
251 return processed_files
253 def downselect(self, **kwargs):
254 """
255 Get another ``Run`` instance identical to the current one but
256 with the following downselection criteria applied to the ``Event``
257 instances returned by ``self.events``. Can also specify a sorting
258 function and a maximum number of returned values:
260 Parameters
261 ----------
262 invert : bool, optional
263 Invert what matches and what doesn't. Default: False
264 eventid_filter : str, optional
265 A glob (as taken by ``fnmatch``) that the ``eventid`` must match.
266 fileexists : str, optional
267 The event directory contains a file with this name.
268 fhexists : llama.filehandler.FileHandler, optional
269 The eventdir contains the file for this FileHandler.
270 fhnameexists : str, optional
271 The eventdir contains the file for the FileHandler with this name.
272 fhmeta : llama.filehandler.FileHandler, optional
273 The eventdir contains a metadata rider for the file for this
274 FileHandler.
275 fhnamemeta : str, optional
276 The eventdir contains a metadata rider for the file for the
277 FileHandler with this name.
278 vetoed : bool, optional
279 Whether the events have been vetoed by the VETOED flag or not.
280 manual : bool, optional
281 Whether the events have been marked as manual by the MANUAL flag.
282 modbefore : float, optional
283 Select events whose directory modtimes were before this timestamp.
284 modafter : float, optional
285 Select events whose directory modtimes were after this timestamp.
286 sec_since_mod_gt : float, optional
287 Select events whose directory modtimes are more than this many
288 seconds ago.
289 sec_since_mod_lt : float, optional
290 Select events whose directory modtimes are less than this many
291 seconds ago.
292 v0before : float, optional
293 Select events whose first event state version was generated before
294 this timestamp. Will **IGNORE** directories that do not have any
295 versioned files.
296 v0after : float, optional
297 Select events whose first event state version was generated after
298 this timestamp. Will **IGNORE** directories that do not have any
299 versioned files.
300 sec_since_v0_gt : float, optional
301 Select events whose first event state version was generated more
302 than this many seconds ago. Will **IGNORE** directories that do not
303 have any versioned files.
304 sec_since_v0_lt : float, optional
305 Select events whose first event state version was generated less
306 than this many seconds ago. Will **IGNORE** directories that do not
307 have any versioned files.
308 sortkey : function, optional
309 A function taking ``Event`` instances that can be passed to
310 ``sorted`` to sort the downselected ``Event`` instances. Default:
311 None (i.e. no sorting)
312 reverse : bool, optional
313 Whether to reverse the order of sorting (i.e. put the results in
314 descending order) before applying ``limit``. Default: False
315 limit : int, optional
316 Return up to this number of events. Most useful if ``sortkey`` has
317 also been provided. Default: None (i.e. no limit)
318 """
319 if kwargs:
320 return type(self)(
321 rundir=self.rundir,
322 pipeline=self.pipeline,
323 downselection=self.downselection + (ImmutableDict(kwargs),)
324 )
325 return self
327 def downselect_pipeline(self, invert=False, **kwargs):
328 """Return a ``Run`` instance with a pipeline that has been downselected
329 using ``Pipeline.downselect``."""
330 kwargs['invert'] = invert
331 return type(self)(rundir=self.rundir,
332 pipeline=self.pipeline.downselect(**kwargs),
333 downselection=self.downselection)
335 @property
336 def vis(self):
337 """A collection of visualization methods for this ``Run`` instance."""
338 return RunVisualization(self)
340 def __str__(self):
341 name = type(self).__name__
342 fmt = ('{}(rundir="{}",\n' + len(name)*' ' + ' pipeline="{}")')
343 return fmt.format(name, self.rundir, self.pipeline)
345 def __repr__(self):
346 return str(self)
349class ParseRunsAction(Action): # pylint: disable=too-few-public-methods
350 """
351 Take a bunch of pathnames and parse them into ``Run`` instances with
352 associated ``eventid`` glob filters. See ``Parsers`` docstring for
353 details.
354 """
356 _default_run = tuple()
358 def __call__(self, parser, namespace, values, option_string=None):
359 if getattr(namespace, self.dest, None) is None:
360 setattr(namespace, self.dest, list())
361 if not values:
362 values = self._default_run
363 if not isinstance(values, (list, tuple)):
364 values = [values]
365 for path in values:
366 if '/' not in path:
367 path = os.path.realpath(path)
368 if path == '/':
369 parser.error("You can't specify the root directory '/' "
370 "as the ``Event`` directory to manipulate. "
371 "``Event`` directories must always be "
372 "subdirectories of a ``Run`` directory. Why "
373 "would you want to do this anyway?")
374 if re.findall(r'^/[^/]*$', path):
375 path = '/' + path # if they are matching a subdir of '/'
376 assert '/' in path
377 splitpath = path.split('/')
378 rundir = '/'.join(splitpath[:-1]) or DEFAULT_RUN_DIR
379 eventidfilt = splitpath[-1] or DEFAULT_EVENT_GLOB
380 getattr(namespace, self.dest).append(
381 Run(rundir=rundir).downselect(eventid_filter=eventidfilt))
382 LOGGER.info("Selected run directories: %s", [r.rundir for r in
383 namespace.run])
386def postprocess_select_pipeline(_self: CliParser, namespace: Namespace):
387 """
388 Take the pipeline specified by ``--pipeline`` and/or ``--filehandlers`` and
389 set the ``llama.Run`` instances selected in ``namespace`` to use that
390 pipeline instead of the default.
391 """
392 LOGGER.info("Pipeline specified: %s", namespace.pipeline)
393 for i, run in enumerate(namespace.run):
394 namespace.run[i] = Run(rundir=run.rundir,
395 pipeline=namespace.pipeline,
396 downselection=run.downselection)
397 LOGGER.info("Selected runs: %s", namespace.run)
400def postprocess_dry_run(_self: CliParser, namespace: Namespace):
401 """
402 If ``--dry-run-dirs`` is true, print the directories that would be
403 affected by the given arguments and quit without taking further action. If
404 you want to extend this, print more dry run information and then call this
405 function to print run/event information before quitting.
406 """
407 if namespace.dry_run_dirs:
408 print("DOWNSELECTIONS IN EFFECT:")
409 for i, downselect in enumerate(namespace.run[0].downselection):
410 print(f" - LAYER {i}:")
411 for arg, value in downselect.items():
412 print(f" {arg}: {json.dumps(value)}")
413 print("RUNS AFFECTED:")
414 for run in namespace.run:
415 print(run.rundir)
416 for event in run.events:
417 print(f" {os.path.basename(event.eventdir)}")
418 exit()
421def postprocess_downselect(_self: CliParser, namespace: Namespace):
422 """
423 If ``namespace.downselect`` is not ``None``, parse it as a
424 comma-separated list of ``key=value`` pairs, where ``value`` will be parsed
425 as a boolean if it equals either ``True`` or ``False`` and as a string
426 otherwise. Use these arguments to ``downselect`` each of the runs specified
427 in ``namespace.run``.
428 """
429 if namespace.downselect:
430 kwargs = dict(p.split('=', 1) for p in namespace.downselect.split(','))
431 for key in kwargs:
432 if kwargs[key] == 'True':
433 kwargs[key] = True
434 elif kwargs[key] == 'False':
435 kwargs[key] = False
436 else:
437 try:
438 kwargs[key] = int(kwargs[key])
439 except ValueError:
440 try:
441 kwargs[key] = float(kwargs[key])
442 except ValueError:
443 pass
444 runs = [r.downselect(**kwargs) for r in namespace.run]
445 setattr(namespace, 'run', runs)
448class PrintDownselectionsAction(Action):
449 """
450 Print a dedented docstring for ``Run.downselect`` and exit.
451 """
453 def __call__(self, parser, namespace, values, option_string=None):
454 print(dedent(Run.downselect.__doc__))
455 exit(0)
458# pylint: disable=missing-docstring
459class Parsers:
460 __doc__ = dedent(f"""
461 Specifying Directories
462 ----------------------
464 Each LLAMA trigger gets its own directory. The name of this directory
465 is called the ``eventid`` and the trigger itself is a LLAMA ``Event``
466 (see: ``llama.event``). For a given LLAMA run, all event directories
467 should go in a commond directory called a "run directory"; the
468 collection of events is called a ``Run`` (see: ``llama.run``). Most
469 things the pipeline does work on a single ``Run`` and are meant to
470 affect one or more matching ``Event`` instances. When you specify
471 directories, you are implicitly specifying the ``Run`` (i.e. collection
472 of triggers) as well as a UNIX-style glob (like the asterisk matching
473 all files, ``*``) which describes the ``eventid`` pattern you want to
474 match. For example, matching all event IDs that start with "S"
475 (corresponding to O3 LIGO/Virgo superevents) would require using ``S*``
476 as your event glob.
478 *If you want to explicitly print which currently-existing* ``Event``
479 *directories will be impacted by the arguments you provide, you can
480 use* ``--dry-run-dirs`` *to print the impacted directories and exit
481 without taking further action. This is good practice while getting used
482 to this interface.*
484 The syntax for specifying the ``Run`` and ``Event`` glob is the path of
485 the run directory followed by a slash followed by the event glob with
486 **no slash at the end** (be sure to escape the ``*`` so the shell
487 doesn't expand it):
489 .. code::
491 '/run/directory/event*glob'
493 Specify **only** the event glob by leaving the run directory out but
494 keeping the leading ``/`` (if for some insane reason your root
495 directory is your run directory, a double-leading ``/`` will
496 communicate your perverse desire). In this case the default ``Run``
497 directory ``{DEFAULT_RUN_DIR}`` is implied, so the following are
498 equivalent:
500 .. code::
502 '/event*glob'
503 {DEFAULT_RUN_DIR}'event*glob'
505 Specify **only** the ``Run`` directory by leaving a trailing slash and
506 omitting the event glob; in this case, the default event glob
507 ``{DEFAULT_EVENT_GLOB}`` will be used, so the following are equivalent:
509 .. code::
511 /run/directory/
512 /run/directory/'{DEFAULT_EVENT_GLOB}'
514 You can use relative paths for the ``Run`` directory, the final part of
515 the path will **not** be expanded and will be treated as the base
516 directory. The only exception to this is if you are using relative
517 paths and don't put *any* ``/`` in the specified path, in which case
518 the relative path will be expanded. This allows the common and
519 intuitive behavior of running specific events in the current directory
520 when you pass their name alone, or alternatively to treat the current
521 directory as the only event directory by passing a single ``.`` as the
522 run argument. Something like ``./.``, however, will be interpreted as
523 meaning you want the *current* directory to be the run directory only
524 matching ``Event`` ids of ``.``.
526 Specifying Directories: Examples
527 --------------------------------
529 The following examples assume you are currently in the event directory
530 ``/some/directory/``. Let's say this is the event directory, and you
531 want to update **only** the contents of this directory. You can specify
532 the run as ``/some/`` and the event glob as ``directory`` with either
533 of the following commands paths:
535 .. code::
537 .
538 /some/directory
540 Alternatively, if ``/some/directory/`` is a run directory, and you want
541 to affect the event directories it contains that match the default
542 event glob ``{DEFAULT_EVENT_GLOB}``, you can run use any of the
543 following (note again that the event glob is in quotes to prevent your
544 shell from expanding it into multiple arguments):
546 .. code::
548 ./
549 ./'{DEFAULT_EVENT_GLOB}'
550 /some/directory/
551 /some/directory/'{DEFAULT_EVENT_GLOB}'
553 If you want to use the name of the current directory as your event glob
554 (so that only ``eventids`` that have the *same* basename as your
555 current directory are used) while **keeping** the default run directory
556 ``{DEFAULT_RUN_DIR}``, you would have to place a leading slash followed
557 by the actual name of the run directory; as noted above, ``/.`` not
558 work because the dot will be treated literally as the eventid you want
559 to use. (Note that you usually wouldn't want to do this; why would you
560 be in this directory if you want to operate on an event stored in a
561 different run directory?):
563 .. code::
565 /directory
566 {DEFAULT_RUN_DIR}directory
568 You can further specify which types of events should be processed by
569 specifying ``--downselect`` followed by a string to be passed
570 as the arguments to ``Run.downselect`` (run ``--print-downselections``
571 to see possible options).
573 See ``llama.run`` and ``llama.event`` for more information on ``Run``
574 and ``Event`` objects.
575 """).strip()
577 def __init__(
578 self,
579 downselect=None,
580 run=(os.path.join(DEFAULT_RUN_DIR, DEFAULT_EVENT_GLOB),)
581 ):
582 """Create a new parser collection with the specified ``downselect``
583 arguments to be passed to parsed ``Run`` instances (no downselection if
584 not provided) and a default list of ``run`` inputs (as typed at the
585 command line, i.e. paths with UNIX globs for ``eventid``).
586 """
587 self.downselect = downselect
588 self.run = run
590 @property
591 def eventfiltering(self):
592 """A ``CliParser`` to be used for downselecting runs and events.
593 """
595 class ParseRunsWithDefaultAction(ParseRunsAction):
597 _default_run = self.run
599 eventfiltering = CliParser(add_help=False, epilog=__doc__)
600 fgroup = eventfiltering.add_argument_group(
601 'filter runs and events (see: ``llama.run``)')
602 arg = fgroup.add_argument
603 arg('run', nargs="?", action=ParseRunsWithDefaultAction, help=f"""
604 A pattern specifying a list of directories to update of the
605 form ``/run/directory/event*glob``. See end of ``llama run -h``
606 documentation for details. (default:
607 ``{self.run[0] or DEFAULT_RUN_DIR+DEFAULT_EVENT_GLOB}``""")
608 arg('--dry-run-dirs', action='store_true', help="""
609 Print the runs and event directories that would be affected and
610 exit without taking further action.""")
611 arg('--downselect', default=self.downselect, help=f"""
612 Arguments to pass to the ``downselect`` method of runs selected
613 by the ``run`` argument (note that ``eventid_filter`` is
614 already implicitly set by the glob pattern specified in
615 ``run``). Arguments will only be parsed as booleans (if they
616 equal "True" or "False"), ints (if they can be parsed as such),
617 floats (if they can be parsed as such), or strings and should
618 be separated by commas, e.g. ``--downselect
619 manual=False,fhnameexists=PAstro``.
620 Omit a list of downselections or provide an empty string to
621 specify no further downselections beyond the one implied by the
622 ``run`` argument. (default: {self.downselect})""")
623 arg('--print-downselections', action=PrintDownselectionsAction,
624 nargs=0, help="Print available downselections.")
625 eventfiltering.POSTPROCESSORS = (
626 postprocess_downselect,
627 postprocess_dry_run,
628 )
629 return eventfiltering
631 @property
632 def pipeline_and_eventfiltering(self):
633 """
634 Get a combination of ``llama.pipeline.Parsers.pipeline`` and
635 ``llama.run.Parsers.eventfiltering`` processors in the correct order
636 and includes the extra step of using the pipeline specified in the
637 first parser in the ``Run`` instances returned by the second parser.
638 """
639 parser = CliParser(
640 add_help=False,
641 prefix_chars="-+",
642 parents=(
643 PipeParsers.pipeline,
644 self.eventfiltering,
645 ),
646 )
647 parser.POSTPROCESSORS += (postprocess_select_pipeline,)
648 return parser
651class RunVisualization:
652 """Provide methods for visualizing the status of a run directory."""
654 def __init__(self, run):
655 """Create visualizations for a certain ``Run`` instance."""
656 self._run = run
658 def wall_times(self, outfile=None):
659 """Create histograms of wall times (i.e. how long each file took to
660 generate) for each FileHandler in this ``Run`` instance.
662 Parameters
663 ----------
664 outfile : str, optional
665 If provided, save all plots as PNG files to a gzipped tarfile with
666 this filename.
668 Returns
669 -------
670 plots : dict
671 A dictionary of ``matplotlib.figure`` instances whose keys are the
672 names of each ``FileHandler`` class and whose values are histograms
673 of wall times for each ``FileHandler`` class.
674 """
675 import matplotlib.pyplot as plt
676 plots = dict()
677 for fhname in self._run.pipeline.file_handlers:
678 times = list()
679 for event in self._run.downselect(fhnamemeta=fhname):
680 try:
681 times.append(event.files[fhname].wall_time)
682 except KeyError:
683 pass
684 fig = plt.figure()
685 plt.hist(times)
686 plt.gca().set_title("{} Wall Times".format(fhname))
687 plt.gca().set_xlabel("Time to Generate File [s]")
688 plt.gca().set_ylabel("Count")
689 plots[fhname] = fig
690 if outfile is not None:
691 plotlist = [plots[k] for k in plots]
692 archive_figs(plotlist, outfile, exts=['png'],
693 fname_list=plots.keys())
694 return plots
696 def finished(self, outfile=None):
697 """Create a bar plot showing the proportion of complete to incomplete
698 files for each FileHandler in this ``Run`` instance.
700 Parameters
701 ----------
702 outfile : str, optional
703 If provided, save the plot to this filename.
705 Returns
706 -------
707 fig : matplotlib.figure
708 The bar plot figure.
709 """
710 import matplotlib.pyplot as plt
711 fhnames = self._run.pipeline.file_handlers.keys()
712 ind = range(len(fhnames))
713 ndirs = len(self._run.events())
714 done = [len(self._run.downselect(fhnameexists=n)) for n in fhnames]
715 notdone = [ndirs - d for d in done]
716 fig = plt.figure()
717 # matplotlib.org/gallery/lines_bars_and_markers/bar_stacked.html
718 pdone = plt.bar(ind, done)
719 pnotdone = plt.bar(ind, notdone, bottom=done)
720 plt.title('Files finished generating (by FileHandler)')
721 plt.xlabel('FileHandler name')
722 plt.ylabel('Number of files')
723 plt.xticks(ind, fhnames, rotation='vertical')
724 plt.legend((pdone[0], pnotdone[0]), ('File exists', 'File not made'))
725 for tick in plt.gca().xaxis.get_major_ticks():
726 tick.label.set_fontsize(8)
727 plt.tight_layout()
728 if outfile is not None:
729 fig.savefig(outfile)
730 return fig