#compdef perf
# ------------------------------------------------------------------------------
# Description
# -----------
#
#  Completion script for Linux performance counters 3.3 (perf.wiki.kernel.org).
#
# ------------------------------------------------------------------------------
# Authors
# -------
#
#  * Julien Nicoulaud <julien.nicoulaud@gmail.com>
#  * Shohei YOSHIDA <syohex@gmail.com>
#
# ------------------------------------------------------------------------------

_perf_events() {
  local -a events=(${(@f)$(perf list | sed -n -e 's/^  \([^ =]*\).*$/\1/p')})
  _values -s ',' 'events' $events
}

_perf_top_fields() {
  local fields=(overhead overhead_sys overhead_us overhead_children sample period)
  _values -s ',' 'fields' $fields
}

_perf_pids() {
  local -a pids=(/proc/[1-9]*(:t))
  _values -s ',' 'pids' $pids
}

_perf_stat_command() {
  _alternative "_command_names -e" "subcommand:command:(record report)"
}

_perf_c2c_coalesce() {
  local columns=(tid pid iaddr dso)
  _values -s ',' 'columns' $columns
}

_perf_mem_type() {
  local types=(load store)
  _values -s ',' 'type' $types
}

_perf_test_list() {
  # TODO show test detail
  local -a test_list=(${(@f)$(perf test list 2>&1 | sed -n -e 's/^ *\([^ ]*\): \(.*\)$/\1/p')})
  _values -s ',' 'test' $test_list
}

_perf_report_sort_keys() {
  local -a sort_keys=(
    "comm" "pid" "dso" "dso_size" "symbol" "symbol_size" "parent" "cpu"
    "socket" "srcline" "srcfile" "weight" "local_weight" "cgroup_id"
    "transaction" "overhead" "overhead_sys" "overhead_us" "overhead_guest_sys"
    "overhead_guest_us" "sample" "period" "time" "dso_from" "dso_to" "symbol_from"
    "symbol_to" "srcline_from" "srcline_to" "mispredict" "in_tx" "abort" "cycles"
    "symbol_daddr" "dso_daddr" "locked" "tlb" "mem" "snoop" "dcacheline"
    "phys_daddr" "trace" "trace_fields"
  )

  _values -s ',' 'keys' $sort_keys
}

_perf_branch_filter() {
  local -a filters=(
    'any:any type of branches'
    'any_call:any function call or system call'
    'any_ret:any function return or system call return'
    'ind_call:any indirect branch'
    'call:direct calls, including far (to/from kernel) calls'
    'u:only when the branch target is at the user level'
    'k:only when the branch target is in the kernel'
    'hv:only when the target is at the hypervisor level'
    'in_tx:only when the target is in a hardware transaction'
    'no_tx:only when the target is not in a hardware transaction'
    'abort_tx:only when the target is a hardware transaction abort'
    'cond:conditional branches'
    'save_type:save branch type during sampling in case binary is not available later'
  )
  _describe -t filters 'branch filter' filters
}

_perf_clock_id() {
  local clocks=(CLOCK_MONOTONIC CLOCK_MONOTONIC_RAW CLOCK_BOOTTIME CLOCK_REALTIME CLOCK_TAI)
  _values 'clocks' $clocks
}

_perf_lock_subcommand() {
  local -a subcmds=(
    'record:records lock events'
    'report:reports statistical data'
    'script:shows raw lock events'
    'info:shows metadata like threads or addresses of lock instances'
  )
  _describe -t subcmds 'lock subcommand ' subcmds
}

_perf_trace_subcommand() {
  _alternative "_command_names -e" "subcommand:command:(record)"
}

_perf_kmem_sort_keys() {
  local -a keys=(ptr callsite bytes hit pingpong frag page order migtype gfp)

  for ((i = 1; i < CURRENT; i++))
  do
    if [[ $words[$i] == "--slab" ]]; then
      keys=(ptr callsite bytes hit pingpong frag)
      break
    fi
    if [[ $words[$i] == "--page" ]]; then
      keys=(page callsite hit order migtype gfp)
      break
    fi
  done

  _values -s ',' 'key' $keys
}

_perf_sched_subcommand() {
  local -a subcmds=(
    'record:record the scheduling events'
    'latency:report the per task scheduling latencies'
    'script:see a detailed trace'
    'replay:simulate the workload'
    'map:print a textual context-switching outline'
    'timehist:provides an analysis of scheduling events'
  )
  _describe -t subcmds 'lock subcommand ' subcmds
}

_perf_script_fields() {
  local -a fields=(
    "comm" "tid" "pid" "time" "cpu" "event" "trace" "ip" "sym" "dso"
    "addr" "symoff" "srcline" "period" "iregs" "uregs" "brstack"
    "brstacksym" "flags" "bpf-output" "brstackinsn"  "brstackoff"
    "callindent" "insn" "insnlen" "synth" "phys_addr" "metric"
    "misc" "srccode" "ipc"
  )

  compset -P '(hw|sw|trace):'

  _values -s ',' 'fields' $fields
}

_perf() {
  local context curcontext="$curcontext" state line
  typeset -A opt_args

  local ret=1

  _arguments -C \
    '(- : *)--version[show version number and exit]' \
    '(- : *)--help[show help]: :_perf_cmds' \
    '1: :_perf_cmds' \
    '*::arg:->args' \
  && ret=0

  case $state in
    (args)
      curcontext="${curcontext%:*:*}:perf-cmd-$words[1]:"
      case $line[1] in
        (annotate)
          # TODO Complete CPU list
          # TODO Complete disassembler style
          _arguments \
            '(- : *)'{-a,--all}'[prints all the available commands on the standard output]' \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-d --dsos)'{-d,--dsos=}'[only consider symbols in these dsos]:dso list' \
            '(-s --symbol)'{-s,--symbol=}'[symbol to annotate]:symbol' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-D --dump-raw-trace)'{-D,--dump-raw-trace}'[dump raw trace in ASCII]' \
            '(-k --vmlinux)'{-k,--vmlinux=}'[vmlinux pathname]: :_files' \
            '(-m --modules)'{-m,--modules}'[load module symbols]' \
            '(-l --print-line)'{-l,--print-line}'[print matching source lines]' \
            '(-P --full-paths)'{-P,--full-paths}'[don'\''t shorten the displayed pathnames]' \
            '--stdio[use the stdio interface]' \
            '--tui[use the TUI interface]' \
            '(-C --cpu)'{-C,--cpu}'[only report samples for the list of CPUs provided]:CPU list' \
            '--asm-raw[show raw instruction encoding of assembly instructions]' \
            '(--no-source)--source[interleave source code with assembly code]' \
            '(--source)--no-source[don'\''t interleave source code with assembly code]' \
            '--symfs=[look for files with symbols relative to this directory]: :_files -/' \
            '(-M --disassembler-style)'{-M,--disassembler-style=}'[set disassembler style for objdump]:disassembler style' \
            '1::symbol name' \
            && ret=0
        ;;
        (archive)
          _arguments \
            '1: :_files' \
            && ret=0
        ;;
        (bench)
          _arguments \
            '(-r --repeat)'{-r,--repeat=}'[specify amount of times to repeat the run]' \
            '(-f --format)'{-f,--format=}'[specify format style]: :(default simple)' \
            '1:subsystem:(sched mem numa futex epoll all)' \
            '*:: :->bench_subsystem' \
            && ret=0
        ;;
        (buildid-cache)
          _arguments \
            '(-a --add)'{-a,--add=}'[add specified file to the cache]: :_files' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            '(-k --kcore)'{-k,--kcore=}'[add specified kcore file to core]:kcore:_files' \
            '(-r --remove)'{-r,--remove=}'[remove specified file from the cache]: :_files' \
            '(-p,--purge)'{-p,--purge=}'[purge all cached binaries including older caches which have specified path]: :_files' \
            '(-P,--purge-all)'{-P,--purge-all}'[purge all cached binaries]' \
            '(-M,--missing)'{-M,--missing=}'[list missing build ids in the cache for the specified file]: :_files' \
            '(-u,--update)'{-u,--update=}'[update specified file of the cache]: :_files' \
            '(-l,--list)'{-l,--list}'[list all valid binaries from cache]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '--target-ns=[obtain mount namespace information from the target pid]: :_pids' \
            && ret=0
		;;
        (buildid-list)
          _arguments \
            '(-H --with-hits)'{-H,--with-hits}'[show only DSOs with hits]' \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-f --force)'{-f,--force}'[don'\''t do ownership validation]' \
            '(-k --kernel)'{-k,--kernel}'[show running kernel build id]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            && ret=0
        ;;
        (c2c)
          _arguments \
            '1:subcmd:(record report)' \
            '*:: :->c2c_args'
        ;;
        (config)
          _arguments \
            '(-l --list)'{-l,--list}'[show current config variables for all sections]' \
            '--user[read or write user config]' \
            '--system[read or write system config]' \
            && ret=0
        ;;
        (data)
          _arguments \
            '1:command:((convert))' \
            '*:: :->data_args' \
            && ret=0
        ;;
        (diff)
          _arguments \
            '(-M --displacement)'{-M,--displacement}'[show position displacement relative to baseline]' \
            '(-D --dump-raw-trace)'{-D,--dump-raw-trace}'[dump raw trace in ASCII]' \
            '(-m --modules)'{-m,--modules}'[load module symbols]' \
            '(-d --dsos)'{-d,--dsos=}'[only consider symbols in these dsos]:dso list' \
            '(-C --comms)'{-C,--comms=}'[only consider symbols in these comms]:comm list' \
            '(-S --symbols)'{-S,--symbols=}'[only consider these symbols]:symbol list' \
            '(-s --sort)'{-s,--sort=}'[sort by key(s)]: :_values -s , key pid comm dso symbol' \
            '(-t --field-separator)'{-t,--field-separator=}'[use a special separator character and don'\''t pad with spaces]:separator' \
            '(-v --verbose)'{-v,--verbose}'[be verbose, for instance, show the raw counts in addition to the diff]' \
            '(-f --force)'{-f,--force}'[don'\''t complain, do it]' \
            '--symfs=[look for files with symbols relative to this directory]: :_files -/' \
            '1:old file:_files' \
            '2:new file:_files' \
            && ret=0
        ;;
        (evlist)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            && ret=0
        ;;
        (ftrace)
          _arguments \
            '(-t --tracer)'{-t,--tracer=}'[tracer to use]:tracer:(function_tracer function)' \
            '(-v --verbose)'{-v,--verbose=}'[verbosity level]:level:' \
            '(-p --pid)'{-p,--pid=}'[trace on existing process ID]:pids:_perf_pids' \
            '(-a --all-cpus)'{-a,--all-cpus}'[force system-wide collection]' \
            '(-C --cpu)'{-C,--cpu=}'[only trace for the list of CPUs]:cpu_list:' \
            \*{-T,--trace-funcs=}'[only trace functions given by the argument]:func:' \
            \*{-N,--notrace-funcs=}'[do not trace functions given by the argument]:func:' \
            \*{-G,--graph-funcs=}'[set graph filter on the given function]:func:' \
            \*{-g,--nograph-funcs=}'[set graph notrace filter on the given function]:func:' \
            '(-D,--graph-depth)'{-D,--graph-depth=}'[set max depth for function graph tracer]' \
            && ret=0
        ;;
        (inject)
          _arguments \
            '(-b --build-ids)'{-b,--build-ids=}'[inject build-ids into the output stream]:build-id list' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-i --input)'{-i,--input=}'[input file name]:input:_files' \
            '(-o --output)'{-o,--output=}'[output file name]:output:_files' \
            '(-s --sched-stat)'{-s,--sched-stat}'[merge sched_stat and sched_switch for getting events]' \
            '--kallsyms=[kallsyms file]:kallsyms:_files' \
            '--itrace=[decode instruction tracing data]' \
            '--strip[use with --itrace to strip out non-synthesized events]' \
            '(-j --jit)'{-j,--jit}'[process jitdump files]' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            && ret=0
        ;;
        (kallsyms)
          _arguments \
            '(-v --verbose)'{-v,--verbose=}'[increase verbosity level]' \
            && ret=0
        ;;
        (kmem)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-f --force)'{-f,--force}'[dont do ownership validation]' \
            '--caller[show per-callsite statistics]' \
            '--alloc[show per-allocation statistics]' \
            '(-s --sort)'{-s,--sort=}'[sort by output]: :_perf_kmem_sort_keys' \
            '(-n --lines)'{-n,--lines}'[print n lines only]:number' \
            '--raw-ip[print raw ip instead of symbol]' \
            '--slab[analyze slab allocator events]' \
            '--page[analyze page allocator events]' \
            '--live[show live page stat]' \
            '--time=[only analyze samples within given time window]:time_window:' \
            '1:command:((record\:record\ the\ kmem\ events\ of\ an\ arbitrary\ workload stat\:report\ kernel\ memory\ statistics))' \
            '*:: :->kmem_args' \
            && ret=0
        ;;
        (kvm)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-o --output)'{-o,--output=}'[output file name]: :_files' \
            '--host=[collect host side performance profile]:host' \
            '--guest=[collect guest side performance profile]:guest' \
            '--guestmount=[guest os root file system mount directory]: :_files -/' \
            '--guestkallsyms=[guest os /proc/kallsyms file copy]: :_files' \
            '--guestmodules=[guest os /proc/modules file copy]: :_files' \
            '--guestvmlinux=[guest os kernel vmlinux]: :_files' \
            '1:command:((top record report diff buildid-list))' \
            && ret=0
        ;;
        (list)
          _arguments \
            '1:event type:((hw\:hardware\ events hardware\:hardware\ events sw\:software\ events software\:software\ events cache\:cache\ events hwcache\:cache\ events tracepoint\:tracepoint\ events))' \
            && ret=0
        ;;
        (lock)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-D --dump-raw-trace)'{-D,--dump-raw-trace}'[dump raw trace in ASCII]' \
            '(-f --force)'{-f,--force}'[dont complain do it]' \
            '1:command:_perf_lock_subcommand' \
            '*:: :->lock_args' \
            && ret=0
        ;;
        (mem)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]:file:_files' \
            '(-f --force)'{-f,--force}'[do not ownership validation]' \
            '(-t --type)'{-t,--type=}'[select the memory operation type]:type:_perf_mem_type' \
            '(-D --dump-raw-samples)'{-D,--dump-raw-trace}'[dump the raw decoded samples]' \
            '(-x --field-separator)'{-x,--field-separator=}'[specify the field separator used when raw samples]:separator:' \
            '(-C --cpu)'{-C,--cpu=}'[monitor only on the list of CPUs provided]:cpus:' \
            '(-U --hide-unresolved)'{-U,--hide-unresolved}'[only display entries resolved to a symbol]' \
            '(-p --phys-data)'{-p,--phys-data}'[record/report sample phisical addresses]' \
            '1:command:(record report)' \
            '*::arg:->mem_args' \
            && ret=0
        ;;
        (probe)
          _arguments \
            '(-k --vmlinux)'{-k,--vmlinux=}'[specify vmlinux path which has debuginfo]:path:_files' \
            '(-m --module)'{-m,--module=}'[specify module name]:modname_or_path:_files' \
            '(-s --source)'{-s,--source=}'[specify path to kernel source]:path:_files -/' \
            '(-v --verbose -q --quiet)'{-v,--verbose}'[be more verbose]' \
            '(-q --quiet -v --verbose)'{-q,--quiet}'[be quiet]' \
            \*{-a,--add=}'[define a probe event]:probe:' \
            \*{-d,--del=}'[delete probe events]:probe:' \
            '(-l --list)'{-l,--list=}'[list up current probe events]:event:' \
            '(-L --line)'{-L,--line=}'[show source code lines which can be probed]' \
            \*{-V,--vars=}'[show available local variables at given probe point]:probe:' \
            '--externs[show external defined variables]' \
            '--no-inlines[search only for non-inlined functions]' \
            \*{-F,--funcs=}'[show available functions in given module or kernel]:filter:' \
            \*{-D,--definition=}'[show trace-event definition]:probe:' \
            '*--filter=[set filter]:filter:' \
            '(-f --force)'{-f,--force}'[forcibly add events with existing name]' \
            '(-n --dry-run)'{-n,--dry-run}'[dry run]' \
            '--cache[cache the probes]' \
            '--max-probes=[set the maximum number of probe points for an event]:number:' \
            '--target-ns=[obtain mount namespace information from the target pid]:pid:_pids' \
            '(-x --exec)'{-x,--exec=}'[specify path to the executable of shared library]:path:_files' \
            '--demangle-kernel[demangle kernel symbols]' \
            && ret=0
        ;;
        (record)
          _arguments \
            \*{-e,--events=}'[select the PMU event]:event:_perf_events' \
            '*--filter=[event filter]:filter:' \
            '--exclude-perf[do not record events issued by perf itself]' \
            '(-a --all-cpus)'{-a,--all-cpus}'[system-wide collection from all CPUs]' \
            '(-p --pid)'{-p,--pid=}'[record events on existing process ID]:pids:_perf_pids' \
            '(-t --tid)'{-t,--tid=}'[record events on existing thread ID]:tids:' \
            '(-u --uid)'{-u,--uid=}'[record events in threads owned by uid]:uid:' \
            '(-r --realtime)'{-r,--realtime=}'[collect data with this RT SCHED_FIFO priority]:priority:' \
            '--no-buffering[collect data without buffering]' \
            '(-c --count)'{-c,--count=}'[event period to sample]:period:' \
            '(-o --output)'{-o,--output=}'[output file name]:out_file:_files' \
            '(-i --no-inherit)'{-i,--no-inherit}'[child tasks do not inherit counters]' \
            '(-F --freq)'{-F,--freq=}'[profile at this frequency]:freq:(max)' \
            '--string-freq[fail if the specified frequency cannot be used]' \
            '(-m --mmap-pages)'{-m,--mmap-pages=}'[number of mmap data pages]:pages:' \
            '--group[put all events in a single event group]' \
            '-g[enables call-graph recording]' \
            '--call-graph=[setup and enable call-graph recording]:method:(fp dwarf lbr)' \
            '(-q --quiet)'{-q,--quiet}'[do not print any message]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-s --stat)'{-s,--stat}'[record per-thread event counts]' \
            '(-d --data)'{-d,--data}'[record the sample virtual addresses]' \
            '--phys-data[record the sample physical addresses]' \
            '(-T --timestamp)'{-T,--timestamp}'[record the sample timestamps]' \
            '(-P --period)'{-P,--period}'[record the sample period]' \
            '--sample-cpu[record the sample cpu]' \
            '(-n --no-samples)'{-n,--no-samples}'[do not sample]' \
            '(-C --cpu)'{-C,--cpu=}'[collect samples only on the list of CPUs provided]:cpus:' \
            '(-B --no-buildid)'{-B,--no-buildid}'[do not save the build ids of binaries in the perf.data files]' \
            '(-N --no-buildid-cache)'{-N,--no-buildid-cache}'[do not update the buildid cache]' \
            \*{-G,--cgroup=}'[monitor only in the container (cgroup) called "name"]:name:' \
            '(-b --branch-any)'{-b,--branch-any}'[enable taken branch stack sampling]' \
            '(-j --branch-filter)'{-j,--branch-filter=}'[specify stack sampling filter]:filter:_perf_branch_filter' \
            '--weight[enable weightened sampling]' \
            '--namespaces[record events of type PREF_RECORD_NAMESPACES]' \
            '--transaction[record transaction flags for transaction related events]' \
            '(-D --delay)'{-D,--delay=}'[wait msecs before measuring]' \
            '(-I,--intr-regs)'{-I,--intr-regs=}'[capture machine state (registers) at interrupt]:registers:' \
            '--user-regs=[capture user registers at sample time]:registers:' \
            '--running-time[record running and enabled time for read events]' \
            '(-k --clockid)'{-k,--clockid=}'[sets the clockid to use for time fields]:clock_id:_perf_clock_id' \
            '--proc-map-timeout=[time out(ms) limit of processing /proc/XXX/mmap]:msecs:' \
            '--switch-events[record context switch events]' \
            '--clang-path=[path to clang binary to use compiling BPF scripts]:clang_path:_files' \
            '--clang-opt=[option passed to clang when compiling BPF scripts]:clang_opt:' \
            '--vmlinux=[specify vmlinux path which has debuginfo]:vmlinux_path:_files' \
            '--buildid-all[record build-id of all DSOs]' \
            '--aio=[use "n" control blocks in asynchronous trace writing mode]:number:' \
            '--affinity=[set affinity mask by mode value]' \
            '--mmap-flush=[specify minimal number of bytes]:bytes:' \
            '(-z --compression-level)'{-z,--compression-level=}'[compressed level]:level:' \
            '--all-kernel[configure all used events to run in kernel space]' \
            '--all-user[configure all used events to run in user space]' \
            '(--user-callchains)--kernel-callchains[collect callchains only from kernel space]' \
            '(--kernel-callchains)--user-callchains[collect callchains only from user space]' \
            '--timestamp-filename[append timestamp to output file name]' \
            '--timestamp-boundary[record timestamp boundary]' \
            '--switch-output=[generate multiple perf.data files based on mode]:mode:' \
            '--switch-max-files=[when rotating perf.data only keep N files]:number:' \
            '--dry-run[parse options then exit]' \
            '--tail-synthesize[collect samples during finalization instead of beginning of recording]' \
            '--overwrite[makes all events use an overwritable ring buffer]' \
            '1:command:_command_names -e' \
            '*::args:_normal' \
            && ret=0
        ;;
        (report)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]:file:_files' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-q --quiet)'{-q,--quiet}'[do not show any message]' \
            '(-n --show-nr-samples)'{-n,--show-nr-samples}'[show the number of samples for each symbol]' \
            '--show-cpu-utilization[show sample percentage for different cpu modes]' \
            '(-T --threads)'{-T,--threads}'[show per-thread event counters]' \
            '(-c --comms)'{-c,--comms=}'[only consider symbols in these comms]:comms:' \
            '--pid=[only show events for given process ID]:pid:_perf_pids' \
            '--tid=[only show events for given thread ID]:tid:' \
            \*{-d,--dsos=}'[only consider symbols in these dsos]:dsos:' \
            \*{-S,--symbols=}'[only consider these symbols]:symbol:' \
            '--symbol-filter=[only show symbols that match with this filter]:filter:' \
            '(-U --hide-unresolved)'{-U,--hide-unresolved}'[only display entries resolved to a symbol]' \
            '(-s --sort)'{-s,--sort=}'[sort histogram entries by given keys]:keys:_perf_report_sort_keys' \
            '(-F --fields)'{-F,--fields=}'[specify output field]:field:(overhead overhead_sys overhead_us overhead_children sample period)' \
            '(-p --parent)'{-p,--parent=}'[a regex filter to identify parent]:regex:' \
            '(-x --exclude-other)'{-x,--exclude-other}'[only display entries with parent-match]' \
            '(-w --column-widths)'{-w,--column-widths=}'[force each column width to the provided list]' \
            '(-t --field-separator)'{-t,--field-separator=}'[use a special separator character]:sep:' \
            '(-D --dump-raw-trace)'{-D,--dump-raw-trace}'[dump raw trace in ASCII]' \
            '(-g --call-graph)'{-g,--call-graph=}'[display call chains using this argument]' \
            '--children[accumulate callchain of children to parent entry]' \
            '--max-stack=[set the stack depth limit]:number:' \
            '(-G --inverted)'{-G,--inverted}'[alias for inverted caller based call graph]' \
            '--ignore-callees=[regex for ignoring callees of the function]:regex:' \
            '--pretty=[pretty printing style]:style:(normal raw)' \
            '--stdio[use the stdio interface]' \
            '--stdio-color=[color output configuring]:type:(always never auto)' \
            '--tui[use the TUI interface]' \
            '--gtk[use the GTK2 interface]' \
            '(-k --vmlinux --ignore-vmlinux)'{-k,--vmlinux=}'[vmlinux pathname]:vmlinux:_files' \
            '(-k --vmlinux)--ignore-vmlinux[ignore vmlinux files]' \
            '--kallsyms=[kallsyms pathname]:kallsyms:_files' \
            '(-m --modules)'{-m,--modules}'[load module symbols]' \
            '(-f --force)'{-f,--force}'[do not do ownership validation]' \
            '--symfs=[look for files with symbols relative to this directory]:symfs:_files -/' \
            '(-C --cpu)'{-C,--cpu=}'[only report sample for the list of CPUs provided]' \
            '(-M --disassembler-style)'{-M,--disassembler-style=}'[set disassembler style for objdump]:style:' \
            '--source[interleave source code with assembly code]' \
            '--asm-raw[show raw instruction encoding of assembly instructions]' \
            '--show-total-period[show a column with sum of periods]' \
            '(-I --show-info)'{-I,--show-info}'[display extended information about the perf.data file]' \
            '(-b --branch-stack)'{-b,--branch-stack}'[use the addressed of sampled taken branch]' \
            '--branch-history[add the addresses of sampled taken branches to the callstack]' \
            '--objdump=[path to objdump binary]:path:_files' \
            '--group[show event group information together]' \
            '--demangle[demangle symbol names to human readable form]' \
            '--demangle-kernel[demangle kernel symbol names to human readable form]' \
            '--mem-mode[use the data addresses of samples]' \
            '--percent-limit=[do not show entries which have an overhead under this percent]:number:' \
            '--percentage=[determine how to display the overhead percentage of filtered entries]:number:' \
            '--header[show header information in the perf.data file]' \
            '--header-only[show only perf.data header]' \
            '--time=[only analyze samples within given time window]:time:' \
            '--switch-on=[only consider events after this event is found]:event:' \
            '--switch-off=[stop considering events after this event is found]:event:' \
            '--show-on-off-events[show the --switch-on/off events too]' \
            '--itrace=[options for decoding instruction tracing data]' \
            '--full-source-path[show the full path for source files]' \
            '--show-ref-call-graph[show reference callgraphs]' \
            '--socket-filter[only report the samples on the processor socket]' \
            '--samples=[save N individual samples for each histogram entry]:number:' \
            '--raw-trace[do not use print fmt or plugins]' \
            '--hierarchy[enable hierarchical output]' \
            '(--no-inline)--inline[print inline stack]' \
            '(--inline)--no-inline[do not print inline stack]' \
            '--mmaps[show --tasks output plus mmap information]' \
            '--ns[show time stamps in nanoseconds]' \
            '--stat[display overall events statistics without any further processing]' \
            '--tasks[display monitored tasks stored in perf data]' \
            '--percent-type=[set annotation percent type]:type:(global-period local-period global-hits local-hits)' \
            '--time-quantum=[configure time quantum for time sort key]' \
            && ret=0
        ;;
        (sched)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-D --dump-raw-trace)'{-D,--dump-raw-trace}'[dump raw trace in ASCII]' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            '1:command:_perf_sched_subcommand' \
            '*:: :->sched_args' \
            && ret=0
        ;;
        (script)
          # TODO 'perf-script' completion
          _arguments \
            '(-D --dump-raw-trace)'{-d,--dump-raw-trace}'[display verbose dump of the trace data]' \
            '(-L --Latency)'{-L,--Latency=}'[show latency attributes]: :' \
            '(-l --list)'{-l,--list=}'[display a list of available trace scripts]: :' \
            '(-s --script)'{-s,--script=}'[process trace data with the given script]: :' \
            '(-g --gen-script)'{-g,--gen-script=}'[generate perf-script starter script for given language]:lang:' \
            '-a[force system-wide collection]' \
            '(-i --input)'{-i,--input=}'[input file name]:file:_files' \
            '(-d --debug-mode)'{-d,--debug-mode}'[do various checks]' \
            \*{-F,--fields=}'[comma separated list of fields to print]:fields:_perf_script_fields' \
            '(-k --vmlinux)'{-k,--vmlinux=}'[vmlinux pathname]:vmlinux:_files' \
            '--kallsyms=[kallsyms pathname]:kallsyms:_files' \
            '(-G --hide-call-graph)'{-G,--hide-call-graph}'[when printing symbols do not display call chain]' \
            '--stop-bt[stop display of callgraph at these symbols]' \
            '(-C --cpu)'{-C,--cpu=}'[only report samples for the list of CPUs provided]:cpus:' \
            '(-c --comms)'{-c,--comms=}'[only display events for these comms]:comms:' \
            '--pid=[only show events for given process ID]:pids:_perf_pids' \
            '--tid=[only show events for given thread ID]:tids:' \
            '(-I --show-info)'{-I,--show-info}'[display extended information about the perf.data file]' \
            '--show-kernel-path[try to resolve the path of kernel.kallsyms]' \
            '--show-task-events[display task related events]' \
            '--show-mmap-events[display mmap related events]' \
            '--show-namespace-events[display namespace events]' \
            '--show-switch-events[display context switch events]' \
            '--show-lost-events[display lost events]' \
            '--show-round-events[display finished round events]' \
            '--show-bpf-events[display bpf events]' \
            '--demangle[demangle symbol names to human readable form]' \
            '--demangle-kernel[demangle kernel symbol names to human readable form]' \
            '--header[show perf.data header]' \
            '--header-only[show only perf.data header]' \
            '--itrace=[options for decoding instruction tracing data]' \
            '--full-source-path[show the full path for source files for srcline output]' \
            '--max-stack=[set the stack depth limit]:number:' \
            '--ns[use 9 decimal places when displaying time]' \
            '(-f --force)'{-f,--force}'[do not ownership validation]' \
            '--time=[only analyze samples within given time window]' \
            '--max-blocks=[set the maximum number of program blocks to print]:number:' \
            '--reltime[print time stamps relative to trace start]' \
            '--per-event-dump[create per event files]' \
            '--inline[the inline stack will be printed]' \
            '--insn-trace[show instruction stream for intel_pt traces]' \
            '--xed[run xed disassembler on output]' \
            '--call-trace[show call stream for intel_pt traces]' \
            '--call-ret-trace[show call and return stream for intel_pt traces]' \
            '--graph-function=[for itrace only show specified functions and their callees for itrace]:functions:' \
            '--switch-on=[only consider events after this event is found]:event:' \
            '--switch-off=[stop considering events after this event is found]' \
            '--show-on-off-events[show the --switch-on/off events too]'  \
            '1:command:(record report)' \
            && ret=0
        ;;
        (stat)
          _arguments \
            \*{-e,--events=}'[select the PMU event]:event:_perf_events' \
            '(-i --no-inherit)'{-i,--no-inherit}'[child tasks do not inherit counters]' \
            '(-p --pid)'{-p,--pid=}'[stat events on existing process id]:pid:_pids' \
            '(-t --tid)'{-t,--tid=}'[stat events on existing thread id]:tid:' \
            '(-a --all-cpus)'{-a,--all-cpus}'[system-wide collection from all CPUs]' \
            '--no-scale[do not scale/normalize counter values]' \
            \*{-d,--detailed}'[print more detailed statistics]' \
            '(-r --repeat)'{-r,--repeat=}'[repeat command and print average + stddev]:count:' \
            '(-B --big-num)'{-B,--big-num}'[print large numbers with thousands separators according to locale]' \
            '(-C --cpu)'{-C,--cpu=}'[count only the list of CPUs]:cpus:' \
            '(-A --no-aggr)'{-A,--no-aggr}'[do not aggregate counts across all monitored CPUs]' \
            '(-n --null)'{-n,--null}'[null run]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-x --field-separator)'{-x,--field-separator=}'[print separator]' \
            '--table[display time for each run in a table format]' \
            '(-G --cgroup)'{-G,--cgroup=}'[monitor only in the container called this name]' \
            '(-o --output)'{-o,--output=}'[print the output into this file]:file:_files' \
            '--append[append to the output file designated with -o option]' \
            '--log-fd[log output to fd, instead of stderr]' \
            '--pre[pre measurement hook]: :_command_names -e' \
            '--post[post measurement hook]: :_command_names -e' \
            '(-I --interval-print)'{-I,--interval-print=}'[print count deltas every millseconds]:msec:' \
            '--interval-count[print count delta for fixed number of times]:times:' \
            '--interval-clear[clear the screen before next interval]' \
            '--timeout[stop the perf stat session and print count after milliseconds]:msec:' \
            '--metric-only[only print compted metrics]' \
            '--per-socket[aggregate counts per processor socket for system-wide mode measurements]' \
            '--per-die[aggregate counts per processor die for sytem-wide mode measurements]' \
            '--per-core[aggregate counts per phisical processor for sytem-wide mode measurements]' \
            '--per-thread[aggregate counts per monitored threads]' \
            '(-D --delay)'{-D,--delay=}'[wait msecs before measuring]:msec:' \
            '(-T --transaction)'{-T,--transaction}'[print statistics of transactional execution]' \
            '(-)1:command:_perf_stat_command' \
            '*::arg:_normal' \
            && ret=0
          # TODO report/record sub-subcommand completion
        ;;
        (test)
          _arguments \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-s --skip)'{-s,--skip=}'[Tests to skip]:tests:_perf_test_list' \
            '(-F --dont-fork)'{-F,--dont-fork}'[do not fork child for each test]' \
            '1:command:((list))' \
            && ret=0
        ;;
        (timechart)
          _arguments \
            '(-i --input)'{-i,--input=}'[input file name]: :_files' \
            '(-o --output)'{-o,--output=}'[output file name]: :_files' \
            '(-w --width)'{-w,--width=}'[select the width of the SVG file]:width' \
            '(-P --power-only)'{-P,--power-only}'[only output the CPU power section of the diagram]' \
            '(-T --task-only)'{-T,--task-only}'[do not output processor state transitions]' \
            '(-p --process)'{-p,--process}'[select the processes to display, by name or PID]:process' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            '--symfs=[look for files with symbols relative to this directory]: :_files -/' \
            '(-n --proc-num)'{-n,--proc-num=}'[print task info for at least given number of tasks]:number:' \
            '(-t --topology)'{-t,--topology}'[sort CPUs according to topology]' \
            '--highlight=[highlight tasks using different color]:nsecs_or_task:' \
            '--io-skip-eagain[do not draw EAGAIN IO events]' \
            '--io-min-times=[draw small events as if they lasted min-time]:nsecs:' \
            '--io-merge-dist=[merge events that are merge-dist nanoseconds apart]:nsecs:' \
            '1:command:((record))' \
            '*:: :->timechart_args' \
            && ret=0
        ;;
        (top)
          _arguments \
            '(-a --all-cpus)'{-a,--all-cpus}'[system wide collection]' \
            '(-c --count)'{-c,--count=}'[event period to sample]:count:' \
            '(-C --cpu)'{-C,--cpu=}'[monitor only the list of CPUs provided]:cpu_list:' \
            '(-d --delay)'{-d,--delay=}'[number of seconds to delay between refreshes]' \
            \*{-e,--event=}'[select the PMU event]:event:_perf_events' \
            '(-E --entries)'{-E,--entries=}'[display thie many functions]' \
            '(-f --count-filter)'{-f,--count-filter=}'[only display functions with more events than this]:count:' \
            '--group[put the counters into a counter group]' \
            '(-F --freq)'{-F,--freq=}'[profile at thie frequency]:freq:(max)' \
            '(-i --inherit)'{-i,--inherit}'[child tasks do not inherit counters]' \
            '(-k --vmlinux --ignore-vmlinux)'{-k,--vmlinux=}'[path to vmlinux]:vmlinux:_files' \
            '(-k --vmlinux)--ignore-vmlinux[ignore vmlinux files]' \
            '--kallsyms=[kallsyms pathname]:sym_path:_files' \
            '(-m --mmap-pages)'{-m,--mmap-pages=}'[number of mmap data pages or size]:number:' \
            '(-p --pid)'{-p,--pid=}'[profile events on existing process ID]:pid:_pids' \
            '(-t --tid)'{-t,--tid=}'[profile events on existing thread ID]:tid:' \
            '(-u --uid)'{-u,--uid=}'[record events in threads owned by uid]:uid:' \
            '(-r --realtime)'{-r,--realtime=}'[collect data with this RT SCHED_FIFO priority]:priority:' \
            '--sym-annotate=[annotate this symbol]' \
            '(-K --hide_kernel_symbols)'{-K,--hide_kernel_symbols}'[hide kernel symbols]' \
            '(-U --hide_user_symbols)'{-K,--hide_user_symbols}'[hide user symbols]' \
            '--demangle-kernel[demangle kernel symbols]' \
            '(-D --dump-symtab)'{-D,--dump-symtab}'[dump the symbol table used for profiling]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-z --zero)'{-z,--zero}'[zero history access display updates]' \
            '(-s --sort)'{-s,--sort}'[sort by keys]' \
            '--fields=[specify output field ]:fields:_perf_top_fields' \
            '(-n --show-nr-samples)'{-n,--show-nr-samples}'[show a column with the number of samples]' \
            '--show-total-period[show a column with the sum of periods]' \
            '--dsos[only consider symbols in these dsos]' \
            '--comms[only consider symbols in these comms]' \
            '--symbols[only consider these symbols]' \
            '(-M --disassembler-style)'{-M,--disassembler-style=}'[set disassembler style]:style:' \
            '(--no-source)--source[interleave source code with assembly code]' \
            '(--source)--no-source[disable interleaving source code]' \
            '--asm-raw[show raw instruction encoding of assembly instructions]' \
            '-g[Enables call-graph recording]' \
            '--call-graph[setup and enable call-graph recording]' \
            '--children[accumulate callchain of children to parent entry]' \
            '--max-stack[set the stack depth limit]' \
            '--ignore-callees=[ignore callees of functions regex]:regex:' \
            '--percent-limit[do not show entries which have an overhead under that percent]' \
            '(-w --column-widths)'{-w,--column-widths=}'[force each column width to this list]' \
            '--proc-map-timeout[set timeout limit for processing process map]' \
            '(-b --branch-any)'{-b,--branch-any}'[enable taken branch stack sampling]' \
            '(-j --branch-filter)'{-j,--branch-filter}'[enable taken branch stack sampling]' \
            '--raw-trace[do not use print fmt or plugins]' \
            '--hierarchy[enable hierarchy output]' \
            '--overwrite[overwrite records]' \
            '--force[do not do ownership validation]' \
            '--num-thread-synthesize[threads to run when synthesizing events for existing processes]'\
            '--namespaces[record events of type PREF_RECORD_NAMESPACES]' \
            '--switch-on=[only consider events after this event is found]:event:' \
            '--switch-off=[stop considering events after this event is found]:event:' \
            '--show-on-off-events[show the --switch-on/off events too]' \
            && ret=0
        ;;
        (trace)
          _arguments \
            '(-a --all-cpus)'{-a,--all-cpus}'[force system-wide collection from all cpus]' \
            '(-e --expr --event)'{-e,--expr=,--event=}'[list of syscalls and other perf events to show]::event:' \
            '(-D --delay)'{-d,--delay=}'[wait msecs before measuring]' \
            '(-o --output)'{-o,--output=}'[output file name]:output:_files' \
            '(-p --pid)'{-p,--pid=}'[record events on existing process ID]:pid:_perf_pids' \
            '(-t --tid)'{-u,--tid=}'[record events on existing thread ID]:tid:' \
            '(-u --uid)'{-u,--uid=}'[record events in threads owned by uid]:uid:' \
            \*{-G,--cgroup=}'[record events in threads in a cgroup]' \
            '--filter-pids=[filter out events for these pids and for trace itself]:pid:_perf_pids' \
            '(-v --verbose)'{-v,--verbose=}'[verbosity level]' \
            '--no-inherit[child tasks do not inherit counters]' \
            '(-m --mmap-pages)'{-m,--mmap-pages=}'[number of mmap data pages]:number:' \
            '(-C --cpu)'{-C,--cpu}'[collect samples only on the list of CPUs provides]:cpus:' \
            '--duration=[show only events that had a duration grater than N.M ms]' \
            '--sched[accrue thread runtime and provide a summary at the end of the session]' \
            '--failure[show only syscalls that failed]' \
            '(-i --input)'{-i,--input=}'[process events from a given perf data file]:file:_files' \
            '(-T --time)'{-T,--time}'[print full timestamp]' \
            '(--no-comm)--comm[show process COMM]' \
            '(--comm)--no-comm[do not show process COMM]' \
            '(-s --summary)'{-s,--summary}'[show only a summary of syscalls by thread]' \
            '(-S --with-summary)'{-S,--with-summary}'[show all syscalls followed by a summary by thread]' \
            '--tool_stats[show tool stats]' \
            '(-f --force)'{-f,--force}'[do not complain, do it]' \
            '(-F --pf)'{-F,--pf=}'[trace pagefaults]:type:(all min maj)' \
            '(--no-syscalls)--syscalls[trace system calls]' \
            '(--syscalls)--no-syscalls[not trace system calls]' \
            '--call-graph=[setup and enable call-graph recording]' \
            '--kernel-syscall-graph[show the kernel callchains on the syscall exit path]' \
            '--max-events=[stop after processing N events]:events:' \
            '--switch-on=[only consider events after this event is found]:event:' \
            '--switch-off=[stop considering events after this event is found]:event:' \
            '--show-on-off-events[show the --switch-on/off events too]' \
            '--max-stack=[set the stack depth max limit]:limit:' \
            '--min-stack=[set the stack depth min limit]:limit:' \
            '--print-sample[print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info]' \
            '--proc-map-timeout=[time out(ms) limit of processing /proc/XXX/mmap]' \
            '--sort-events[do sorting on batches of events]' \
            '--map-dump[dump BPF maps setup by events passed via -e]' \
            '1: :_perf_trace_subcommand' \
            && ret=0
            # TODO command completion
        ;;
        (version)
          _arguments \
            '--build-options[the status of compiled-in libraries are printed]' \
            && ret=0
        ;;
        (help)
          _arguments \
            '(- : *)'{-a,--all}'[prints all the available commands on the standard output]' \
            '1: :_perf_cmds' \
            && ret=0
          ;;
        *)
          _call_function ret _perf_cmd_$words[1] && ret=0
          (( ret )) && _message 'no more arguments'
        ;;
      esac
    ;;
  esac

  case $state in
    (bench_subsystem)
      case $words[1] in
        (sched)
          _arguments \
            '1:suite:(messaging pipe)' \
            '*:: :->bench_sched_suite' \
            && ret=0
        ;;
        (mem)
          _arguments \
            '1:suite:(memcpy memset)' \
            '*:: :->bench_mem_suite' \
            && ret=0
        ;;
        (numa)
          _arguments \
            '1:suite:(mem)' \
            && ret=0
        ;;
        (futex)
          _arguments \
            '1:suite:(hash wake wake-parallel requeue lock-pi)' \
            && ret=0
        ;;
        (epoll)
          _arguments \
            '1:suite:(wait ctl)' \
            && ret=0
        ;;
      esac
    ;;
    (c2c_args)
      case $words[1] in
        (record)
          _arguments \
            \*{-e,--event=}'[select the PMU event]:events:_perf_events' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-l --ldlat)'{-l,--ldlat=}'[configure mem-loads latency]:latency:' \
            '(-k --all-kernel)'{-k,--all-kernel}'[configure all used events to run in the kernel space]' \
            '(-u --all-user)'{-u,--all-user}'[configure all used events to run in user space]' \
            '1:command:_command_names -e' \
            '*::args:_normal' \
            && ret=0
        ;;
        (report)
          _arguments \
            '(-k --vmlinux)'{-k,--vmlinux=}'[vmlinux pathname]:vmlinux:_files' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '(-i --input)'{-i,--input=}'[specify the input file to process]:proc_file:_files' \
            '(-N --node-info)'{-N,--node-info}'[show extra node info in report]' \
            '(-c --coalesce)'{-c,--coalesce=}'[specify sorting fields for single cacheline display]:field:_perf_c2c_coalesce' \
            '(-g --call-graph)'{-g,--call-graph}'[setup callchains parameters]' \
            '--stdio[force the stdio output]' \
            '--stats[display only statistic tables]' \
            '--full-symbols[display full length of symbols]' \
            '--no-source[do not display source line column]' \
            '--show-all[show all captured HITM lines]' \
            '(-f --force)'{-f,--force}'[do not do ownership validation]' \
            '(-d --display)'{-d,--display=}'[switch to HITM type]:hitm_type:(rmt lcl)' \
            && ret=0
        ;;
      esac
    ;;
    (data_args)
      if [[ $words[1] == "convert" ]]; then
        _arguments \
          '--to-ctf=[specify the path of CTF data directory]:data_dir:_files -/' \
          '-i[specify input perf data file path]:path:_files' \
          '(-f --force)'{-f,--force}'[do not complain do it]' \
          '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
          '--all[convert all events including non-sample events to output]' \
          && ret=0
      fi
    ;;
    (kmem_args)
      if [[ $words[1] == "record" ]]; then
        _arguments \
          '1:command:_command_names -e' \
          '*::args:_normal' \
          && ret=0
      fi
    ;;
    (lock_args)
      case $words[1] in
        (record)
          _arguments \
            '1:command:_command_names -e' \
            '*::args:_normal' \
            && ret=0
        ;;
        (report)
          _arguments \
            '(-k --key)'{-k,--key=}'[sorting key]:key:(default contented avg_wait wait_total wait_max wait_min)' \
            && ret=0
        ;;
        (info)
          _arguments \
            '(-t --threads)'{-t,--threads}'[dump thread list in perf.data]' \
            '(-m --map)'{-m,--map}'[dump map of lock instances]' \
            && ret=0
        ;;
      esac
    ;;
    (mem_args)
      case $words[1] in
        (record)
          _arguments \
            \*{-e,--event=}'[event selector]:events:_perf_events' \
            '(-K --all-kernel)'{-K,--all-kernel}'[configure all used events to run in kernel space]' \
            '(-U --all-user)'{-K,--all-user}'[configure all used events to run in user space]' \
            '(-v --verbose)'{-v,--verbose}'[be more verbose]' \
            '--ldlat=[specify desired latency for loads event]:latency:' \
            '1:command:_command_names -e' \
            '*::args:_normal' \
            && ret=0
        ;;
      esac
    ;;
    (sched_args)
      case $words[1] in
        (record)
          _arguments \
          '1:command:_command_names -e' \
          '*::args:_normal' \
          && ret=0
        ;;
        (map)
          _arguments \
            '--compact[show only CPUs with activity]' \
            '--cpus[show just entries with activities]' \
            '--color-cpus=[highlight the given cpus]:cpus:' \
            '--color-pids=[highlight the given pids]:pids:' \
            && ret=0
        ;;
        (timehist)
          _arguments \
            '(-k --vmlinux)'{-k,--vmlinux=}'[vmlinux pathname]:vmlinux:_files' \
            '--kallsyms=[kallsyms pathname]:kallsyms:_files' \
            '(-g --call-graph)'{-g,--callgraph}'[display call chains]' \
            '--max-stack=[maximum number of functions to display in backtrace]:number:' \
            '(-p --pid)'{-p,--pid=}'[only show events for given process ID]:pids:_perf_pids' \
            '(-t --tid)'{-t,--tid=}'[only show events for given thread ID]:tids:' \
            '(-s --summary)'{-s,--summary}'[show only summary scheduling]' \
            '(-S --with-summary)'{-S,--with-summary}'[show all scheduling events by a summary]' \
            '--symfs=[look for files with symbols relative to this directory]:dir:_files -/' \
            '(-V --cpu-visual)'{-V,--cpu-visual}'[show visual aid for sched switches by CPU]' \
            '(-w --wakeups)'{-w,--wakeups}'[show wakeup events]' \
            '(-M --migrations)'{-M,--migrations}'[show migration events]' \
            '(-n --next)'{-n,--next}'[show next task]' \
            '(-I --idle-hist)'{-I,--idle-hist}'[show idle-related events only]' \
            '--time=[only analyze samples within given time window]:time_window:' \
            '--state[show task state when it switched out]' \
            && ret=0
        ;;
      esac
    ;;
    (timechart_args)
      if [[ $words[1] == "record" ]]; then
        _arguments \
          '(-P --power-only)'{-P,--power-only}'[record only power-related events]' \
          '(-T --tasks-only)'{-T,--tasks-only}'[record only tasks-related events]' \
          '(-I --io-only)'{-I,--io-only}'[record only io-related events]' \
          '(-g --callchain)'{-g,--callchain}'[do call-graph recording]' \
          '1:command:_command_names -e' \
          '*::args:_normal' \
          && ret=0
      fi
    ;;
  esac

  case $state in
    (bench_sched_suite)
      case $words[1] in
        (messaging)
          _arguments \
            '(-p --pipe)'{-p,--pipe}'[use pipe instead of socketpair]' \
            '(-t --thread)'{-t,--thread}'[be multi thread instead of multi process]' \
            '(-g --group)'{-g,--group=}'[specify number of groups]:number:' \
            '(-l --nr_loops)'{-l,--nr_loops=}['specify number of loops']\
            && ret=0
        ;;
        (pipe)
          _arguments \
            '(-l --loop)'{-l,--loop}'[specify number of loops]:number:' \
            && ret=0
      esac
    ;;
    (bench_mem_suite)
      # memcpy and memset support same options
      _arguments \
        '(-l --size)'{-l,--size=}'[specify size of memory to copy]:size:' \
        '(-f --function)'{-f,--function=}'[specify function to copy]:function:' \
        '(-l --nr_loops)'{-l,--nr_loops=}'[repeat memcpy invocation this number of times]:number:' \
        '(-c --cycles)'{-c,--cycles}'[use perf cpu-cycles event instead of gettimeofday]' \
        && ret=0
    ;;
  esac
}

# FIXME Parse 'perf --help' instead of hard-coding.
(( $+functions[_perf_cmds] )) ||
_perf_cmds() {
  local commands; commands=(
    'annotate:read perf.data (created by perf record) and display annotated code'
    'archive:create archive with object files with build-ids found in perf.data file'
    'bench:general framework for benchmark suites'
    'buildid-cache:manage build-id cache'
    'buildid-list:list the buildids in a perf.data file'
    'c2c:shared data C2C/HITM analyzer'
    'config:get and set variables in a configuration file'
    'data:data file related processing'
    'diff:read two perf.data files and display the differential profile'
    'evlist:list the event names in a perf.data file'
    'ftrace:simple wrapper for kernel ftrace functionality'
    'inject:filter to augment the events stream with additional information'
    'kallsyms:searches running kernel for symbols'
    'kmem:tool to trace/measure kernel memory(slab) properties'
    'kvm:tool to trace/measure kvm guest os'
    'list:list all symbolic event types'
    'lock:analyze lock events'
    'mem:profile memory access'
    'probe:define new dynamic tracepoints'
    'record:run a command and record its profile into perf.data'
    'report:read perf.data (created by perf record) and display the profile'
    'sched:tool to trace/measure scheduler properties (latencies)'
    'script:read perf.data (created by perf record) and display trace output'
    'stat:run a command and gather performance counter statistics'
    'test:runs sanity tests'
    'timechart:tool to visualize total system behavior during a workload'
    'top:system profiling tool'
    'trace:strace inspired tool'
    'version:display the version of perf binary'
    'help:show command usage information'
  )
  _describe -t commands 'command' commands "$@"
}

_perf "$@"

# Local Variables:
# mode: Shell-Script
# sh-indentation: 2
# indent-tabs-mode: nil
# sh-basic-offset: 2
# End:
# vim: ft=zsh sw=2 ts=2 et
