""".. _utils-htc-monitor:HTCondor Monitoring-------------------A module with utility to query the HTCondor queue, processthe returned data and display it nicely.Note---- This module is meant to be called as a script, but some of the individual functionality is made public API and one shoule be able to build a different monitor script from the functions in here."""importreimporttimeimportpendulumfromloguruimportloggerfromrichimportboxfromrich.consoleimportGroupfromrich.liveimportLivefromrich.panelimportPanelfromrich.tableimportTablefrompyhdtoolkit.models.htcimportBaseSummary,ClusterSummary,HTCTaskSummaryfrompyhdtoolkit.utils.cmdlineimportCommandLinefrompyhdtoolkit.utils.loggingimportconfig_loggerconfig_logger(level="ERROR")# ----- Data ----- #TASK_COLUMNS_SETTINGS={"OWNER":{"justify":"left","header_style":"bold","style":"bold","no_wrap":True},"BATCH_NAME":{"justify":"center","header_style":"magenta","style":"magenta","no_wrap":True},"SUBMITTED":{"justify":"center","header_style":"medium_turquoise","style":"medium_turquoise","no_wrap":True,},"DONE":{"justify":"right","header_style":"bold green3","style":"bold green3","no_wrap":True},"RUNNING":{"justify":"right","header_style":"bold cornflower_blue","style":"bold cornflower_blue","no_wrap":True,},"IDLE":{"justify":"right","header_style":"bold dark_orange3","style":"bold dark_orange3","no_wrap":True},"TOTAL":{"justify":"right","style":"bold","no_wrap":True},"JOB_IDS":{"justify":"right","no_wrap":True},}CLUSTER_COLUMNS_SETTINGS={"SOURCE":{"justify":"left","header_style":"bold","style":"bold","no_wrap":True},"JOBS":{"justify":"right","header_style":"bold","style":"bold","no_wrap":True},"COMPLETED":{"justify":"right","header_style":"bold green3","style":"bold green3","no_wrap":True},"RUNNING":{"justify":"right","header_style":"bold cornflower_blue","style":"bold cornflower_blue","no_wrap":True,},"IDLE":{"justify":"right","header_style":"bold dark_orange3","style":"bold dark_orange3","no_wrap":True},"HELD":{"justify":"right","header_style":"bold gold1","style":"bold gold1","no_wrap":True},"SUSPENDED":{"justify":"right","header_style":"bold slate_blue1","style":"bold slate_blue1","no_wrap":True},"REMOVED":{"justify":"right","header_style":"bold red3","style":"bold red3","no_wrap":True},}# ----- HTCondor Querying / Processing ----- #
[docs]defquery_condor_q()->str:""" .. versionadded:: 0.9.0 Returns a decoded string with the result of the ``condor_q`` command, to get the status of the caller' jobs. Returns ------- str The utf-8 decoded string returned by the ``condor_q`` command. """return_code,raw_result=CommandLine.run("condor_q")condor_status=raw_result.decode().strip()ifreturn_code==0:returncondor_status# An issue occured, let's raisemsg="Checking htcondor status failed"raiseChildProcessError(msg)
[docs]defread_condor_q(report:str)->tuple[list[HTCTaskSummary],ClusterSummary]:""" .. versionadded:: 0.9.0 Splits information from different parts of the ``condor_q`` command's output into one clean, validated data structures. Parameters ---------- report : str The utf-8 decoded string returned by the ``condor_q`` command, as returned by `query_condor_q` . Returns ------- tuple[list[HTCTaskSummary], ClusterSummary] A tuple with two elements. The first element is a list of each task summary given by ``condor_q``, as a validated `~.models.htc.HTCTaskSummary`. The second element is a validated `~.models.htc.ClusterSummary` object with the scheduler identification and summaries of the user as well as all users' statistics on this scheduler cluster. Example ------- .. code-block:: python condor_q_output = get_the_string_as_you_wish(...) tasks, cluster = read_condor_q(condor_q_output) """tasks:list[HTCTaskSummary]=[]next_line_is_task_report=Falseforlineinreport.splitlines():ifline.startswith("-- Schedd:"):# extract scheduler informationscheduler_id=_process_scheduler_information_line(line)elifline.startswith("OWNER"):# headers line before we get task reportsnext_line_is_task_report=Trueelifnext_line_is_task_report:# extract task report informationiflinenotin("\n",""):tasks.append(_process_task_summary_line(line))else:# an empty line denotes the end of the task report(s)next_line_is_task_report=Falseelse:# extract cluster information, only 3 lines herequerying_owner=tasks[0].owneriftaskselser"(\D+)"if"query"inline:# first linequery_summary=_process_cluster_summary_line(line,"query")elif"all users"inline:# last linefull_summary=_process_cluster_summary_line(line,"all users")eliflinenotin("\n",""):# user line, whoever the user isowner_summary=_process_cluster_summary_line(line,querying_owner)cluster_summary=ClusterSummary(scheduler_id=scheduler_id,query=query_summary,user=owner_summary,cluster=full_summary)returntasks,cluster_summary
# ----- Output Formating ----- #def_make_tasks_table(tasks:list[HTCTaskSummary])->Table:""" Takes the list of `~.models.htc.HTCTaskSummary` models as returned by `read_condor_q` and from the information within creates a `rich.table.Table`. Each row of the table represents one `HTCTaskSummary` from the input. The returned object is ready to be displayed by `rich`. Parameters ---------- tasks : list[HTCTaskSummary] A list of `~.models.htc.HTCTaskSummary` models, as parsed from the output of the ``condor_q`` command. Returns ------- rich.table.Table A `rich.table.Table` object with the tasks information formatted and ready to be displayed by `rich`. """table=_default_tasks_table()date_display_format="dddd, D MMM YY at LT (zz)"# example: Wednesday, 21 Apr 21 9:04 PM (CEST)fortaskintasks:table.add_row(task.owner,str(task.batch_name),task.submitted.format(fmt=date_display_format),str(task.done),str(task.run),str(task.idle),str(task.total),task.job_ids,)returntabledef_make_cluster_table(owner_name:str,cluster:ClusterSummary)->Table:""" Takes a `~.models.htc.ClusterSummary` model as returned by `read_condor_q` and from the information within creates a `rich.table.Table`. The returned object is ready to be displayed by `rich`. Parameters ---------- owner_name : str The name of the user who queried the HTCondor queue. cluster : ClusterSummary A `~.models.htc.ClusterSummary` model, as parsed from the output of the ``condor_q`` command. Returns ------- rich.table.Table A `rich.table.Table` object with the cluster information formatted and ready to be displayed by `rich`. """table=_default_cluster_table()fori,sourceinenumerate(["query","user","cluster"]):table.add_row("Query"ifi==0else("All Users"ifi==2elseowner_name),# noqa: PLR2004str(cluster.model_dump()[source]["jobs"]),str(cluster.model_dump()[source]["completed"]),str(cluster.model_dump()[source]["running"]),str(cluster.model_dump()[source]["idle"]),str(cluster.model_dump()[source]["held"]),str(cluster.model_dump()[source]["suspended"]),str(cluster.model_dump()[source]["removed"]),)returntable# ----- Helpers ----- #def_process_scheduler_information_line(line:str)->str:""" Extract only the 'Schedd: <cluster>.cern.ch' part of the scheduler information line. Parameters ---------- line : str The line containing the scheduler information. Returns ------- str The scheduler name extracted from the input line. """result=re.search(r"Schedd: (.*).cern.ch",line)returnresult.group(1)def_process_task_summary_line(line:str)->HTCTaskSummary:""" Extract the various information in a task summary line, validated and returned as an `HTCTaskSummary` object. Parameters ---------- line : str The line containing the task summary information. Returns ------- pyhdtoolkit.models.htc.HTCTaskSummary The task summary information as a validated `~.models.htc.HTCTaskSummary` object. """line_elements=line.split()returnHTCTaskSummary(owner=line_elements[0],batch_name=line_elements[2],# line_elements[1] is the 'ID:' part, we don't need itsubmitted=pendulum.from_format(f"{line_elements[3]}{line_elements[4]}",fmt="MM/D HH:mm",tz="Europe/Paris"),# Geneva timezone is Paris timezone,done=line_elements[5],run=line_elements[6],idle=line_elements[7],total=line_elements[8],job_ids=line_elements[9],)def_process_cluster_summary_line(line:str,query:str|None=None)->BaseSummary:r""" Extract the various information in a cluster summary line, validated and returned as a `~.models.htc.BaseSummary`. Note ---- Beware if no jobs are running we can't have taken the ``querying_owner`` info from tasks summaries, so we need to match a wildcard word by giving querying_owner=(\D+). This would add a match to the regex search, and we need to look one match further for the wanted information. Parameters ---------- line : str The line containing the cluster summary information. query : str, optional The name of the user who queried the HTCondor queue. Returns ------- pyhdtoolkit.models.htc.BaseSummary The cluster summary information as a validated `~.models.htc.BaseSummary` object. """result=re.search(rf"Total for {query}: (\d+) jobs; (\d+) completed, "r"(\d+) removed, (\d+) idle, (\d+) running, (\d+) held, (\d+) suspended",line,)first_interesting_match_index=1ifquery!=r"(\D+)"else2returnBaseSummary(jobs=result.group(first_interesting_match_index),completed=result.group(first_interesting_match_index+1),removed=result.group(first_interesting_match_index+2),idle=result.group(first_interesting_match_index+3),running=result.group(first_interesting_match_index+4),held=result.group(first_interesting_match_index+5),suspended=result.group(first_interesting_match_index+6),)def_default_tasks_table()->Table:""" Create the default structure for the Tasks Table, hard coded columns and no rows added. Returns ------- rich.table.Table A `rich.table.Table` object with the default structure for the Tasks Table. """table=Table(width=120,box=box.SIMPLE_HEAVY)forheader,header_col_settingsinTASK_COLUMNS_SETTINGS.items():table.add_column(header,**header_col_settings)returntabledef_default_cluster_table()->Table:""" Create the default structure for the Cluster Table, hard coded columns and no rows added. Returns ------- rich.table.Table A `rich.table.Table` object with the default structure for the Cluster Table. """table=Table(width=120,box=box.HORIZONTALS)forheader,header_col_settingsinCLUSTER_COLUMNS_SETTINGS.items():table.add_column(header,**header_col_settings)returntable# ----- Executable ----- #@logger.catch()defmain():defgenerate_renderable()->Group:""" .. versionadded:: 0.9.0 Function called to update the live display, fetches data from htcondor, does the processing and returns a Group with both Panels. Returns ------- rich.console.Group A `rich.console.Group` object with two `rich.panel.Panel` objects inside, one holding the tasks table and the other holding the cluster information. """condor_string=query_condor_q()user_tasks,cluster_info=read_condor_q(condor_string)owner=user_tasks[0].ownerifuser_taskselse"User"tasks_table=_make_tasks_table(user_tasks)cluster_table=_make_cluster_table(owner,cluster_info)returnGroup(Panel(tasks_table,title=f"Scheduler: {cluster_info.scheduler_id}.cern.ch",expand=False,border_style="scope.border",),Panel(cluster_table,title=f"{cluster_info.scheduler_id} Statistics",expand=False,border_style="scope.border",),)withLive(generate_renderable(),refresh_per_second=0.25)aslive:live.console.log("Querying HTCondor Queue - Refreshed Every 5 Minutes\n")whileTrue:try:live.update(generate_renderable())time.sleep(300)exceptKeyboardInterrupt:live.console.log("Exiting Program")breakif__name__=="__main__":main()