Merge tag 'driver-core-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Mar 2012 18:16:20 +0000 (11:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Mar 2012 18:16:20 +0000 (11:16 -0700)
Pull driver core patches for 3.4-rc1 from Greg KH:
 "Here's the big driver core merge for 3.4-rc1.

  Lots of various things here, sysfs fixes/tweaks (with the nlink
  breakage reverted), dynamic debugging updates, w1 drivers, hyperv
  driver updates, and a variety of other bits and pieces, full
  information in the shortlog."

* tag 'driver-core-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core: (78 commits)
  Tools: hv: Support enumeration from all the pools
  Tools: hv: Fully support the new KVP verbs in the user level daemon
  Drivers: hv: Support the newly introduced KVP messages in the driver
  Drivers: hv: Add new message types to enhance KVP
  regulator: Support driver probe deferral
  Revert "sysfs: Kill nlink counting."
  uevent: send events in correct order according to seqnum (v3)
  driver core: minor comment formatting cleanups
  driver core: move the deferred probe pointer into the private area
  drivercore: Add driver probe deferral mechanism
  DS2781 Maxim Stand-Alone Fuel Gauge battery and w1 slave drivers
  w1_bq27000: Only one thread can access the bq27000 at a time.
  w1_bq27000 - remove w1_bq27000_write
  w1_bq27000: remove unnecessary NULL test.
  sysfs: Fix memory leak in sysfs_sd_setsecdata().
  intel_idle: Revert change of auto_demotion_disable_flags for Nehalem
  w1: Fix w1_bq27000
  driver-core: documentation: fix up Greg's email address
  powernow-k6: Really enable auto-loading
  powernow-k7: Fix CPU family number
  ...

596 files changed:
Documentation/RCU/RTFP.txt
Documentation/RCU/checklist.txt
Documentation/RCU/stallwarn.txt
Documentation/RCU/torture.txt
Documentation/RCU/trace.txt
Documentation/devicetree/bindings/gpio/led.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/hwmon/jc42
Documentation/hwmon/w83627ehf
Documentation/hwmon/zl6100
Documentation/input/alps.txt
Documentation/kernel-parameters.txt
Documentation/lockup-watchdogs.txt [new file with mode: 0644]
Documentation/nmi_watchdog.txt [deleted file]
Documentation/scheduler/sched-stats.txt
Documentation/static-keys.txt [new file with mode: 0644]
Documentation/trace/ftrace.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/futex.h
arch/alpha/kernel/perf_event.c
arch/arm/Kconfig
arch/arm/boot/.gitignore
arch/arm/include/asm/perf_event.h
arch/arm/include/asm/pmu.h
arch/arm/kernel/ecard.c
arch/arm/kernel/perf_event.c
arch/arm/kernel/perf_event_v6.c
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/perf_event_xscale.c
arch/arm/kernel/process.c
arch/arm/kernel/smp.c
arch/arm/mach-at91/at91sam9g45_devices.c
arch/arm/mach-at91/at91sam9rl_devices.c
arch/arm/mach-ep93xx/vision_ep9307.c
arch/arm/mach-exynos/mach-universal_c210.c
arch/arm/mach-omap2/id.c
arch/arm/mach-omap2/mailbox.c
arch/arm/mach-omap2/omap-iommu.c
arch/arm/mach-omap2/omap4-common.c
arch/arm/mach-omap2/twl-common.c
arch/arm/mach-pxa/generic.h
arch/arm/mach-pxa/mfp-pxa2xx.c
arch/arm/mach-pxa/pxa25x.c
arch/arm/mach-pxa/pxa27x.c
arch/arm/mach-pxa/pxa3xx.c
arch/arm/mach-pxa/pxa95x.c
arch/arm/mach-s3c2440/common.h
arch/arm/mach-s3c2440/mach-anubis.c
arch/arm/mach-s3c2440/mach-at2440evb.c
arch/arm/mach-s3c2440/mach-gta02.c
arch/arm/mach-s3c2440/mach-mini2440.c
arch/arm/mach-s3c2440/mach-nexcoder.c
arch/arm/mach-s3c2440/mach-osiris.c
arch/arm/mach-s3c2440/mach-rx1950.c
arch/arm/mach-s3c2440/mach-rx3715.c
arch/arm/mach-s3c2440/mach-smdk2440.c
arch/arm/mach-s3c2440/s3c2440.c
arch/arm/mach-s3c2440/s3c244x.c
arch/arm/mach-shmobile/board-ag5evm.c
arch/arm/mach-shmobile/board-ap4evb.c
arch/arm/mach-shmobile/board-bonito.c
arch/arm/mach-shmobile/board-mackerel.c
arch/arm/mach-ux500/Kconfig
arch/arm/mach-vexpress/Kconfig
arch/arm/mm/proc-v7.S
arch/arm/plat-omap/include/plat/irqs.h
arch/arm/plat-s3c24xx/dma.c
arch/arm/plat-samsung/devs.c
arch/arm/plat-spear/time.c
arch/avr32/kernel/process.c
arch/blackfin/kernel/process.c
arch/c6x/include/asm/processor.h
arch/c6x/kernel/entry.S
arch/cris/kernel/process.c
arch/frv/include/asm/perf_event.h
arch/frv/kernel/process.c
arch/h8300/kernel/process.c
arch/hexagon/include/asm/perf_event.h
arch/hexagon/kernel/smp.c
arch/ia64/include/asm/paravirt.h
arch/ia64/kernel/paravirt.c
arch/ia64/kernel/process.c
arch/m32r/kernel/process.c
arch/m68k/kernel/process_mm.c
arch/m68k/kernel/process_no.c
arch/microblaze/kernel/process.c
arch/mips/alchemy/common/time.c
arch/mips/ath79/dev-wmac.c
arch/mips/configs/nlm_xlp_defconfig
arch/mips/configs/nlm_xlr_defconfig
arch/mips/configs/powertv_defconfig
arch/mips/include/asm/jump_label.h
arch/mips/include/asm/mach-au1x00/gpio-au1300.h
arch/mips/include/asm/page.h
arch/mips/kernel/perf_event_mipsxx.c
arch/mips/kernel/process.c
arch/mips/kernel/smp-bmips.c
arch/mips/kernel/traps.c
arch/mips/kernel/vmlinux.lds.S
arch/mips/mm/fault.c
arch/mips/pci/pci.c
arch/mips/pmc-sierra/yosemite/ht-irq.c
arch/mips/txx9/generic/7segled.c
arch/mn10300/kernel/process.c
arch/parisc/kernel/process.c
arch/powerpc/include/asm/jump_label.h
arch/powerpc/include/asm/perf_event_server.h
arch/powerpc/kernel/idle.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/platforms/iseries/setup.c
arch/s390/include/asm/jump_label.h
arch/s390/include/asm/perf_event.h
arch/s390/kernel/irq.c
arch/s390/kernel/process.c
arch/s390/kernel/smp.c
arch/score/kernel/process.c
arch/sh/kernel/cpu/sh2a/ex.S
arch/sh/kernel/cpu/sh4a/clock-sh7757.c
arch/sh/kernel/cpu/sh4a/clock-sh7785.c
arch/sh/kernel/idle.c
arch/sh/kernel/perf_event.c
arch/sparc/Makefile
arch/sparc/include/asm/jump_label.h
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/process_32.c
arch/sparc/kernel/process_64.c
arch/tile/configs/tilegx_defconfig
arch/tile/configs/tilepro_defconfig
arch/tile/kernel/compat_signal.c
arch/tile/kernel/process.c
arch/tile/kernel/signal.c
arch/tile/kernel/sysfs.c
arch/tile/lib/spinlock_32.c
arch/x86/Kconfig
arch/x86/ia32/ia32_aout.c
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/inat.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/timer.h
arch/x86/kernel/Makefile
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event.h
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_intel_lbr.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/kprobes-common.h [new file with mode: 0644]
arch/x86/kernel/kprobes-opt.c [new file with mode: 0644]
arch/x86/kernel/kprobes.c
arch/x86/kernel/kvm.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/time.c
arch/x86/kernel/tsc.c
arch/x86/kernel/tsc_sync.c
arch/x86/kvm/mmu_audit.c
arch/x86/lib/delay.c
arch/x86/lib/inat.c
arch/x86/lib/insn.c
arch/x86/mm/hugetlbpage.c
arch/x86/pci/acpi.c
arch/xtensa/kernel/process.c
block/blk-ioc.c
block/blk-softirq.c
block/blk.h
block/cfq-iosched.c
block/genhd.c
block/partition-generic.c
drivers/block/DAC960.c
drivers/block/floppy.c
drivers/block/sx8.c
drivers/char/ramoops.c
drivers/char/tlclk.c
drivers/char/viotape.c
drivers/clocksource/acpi_pm.c
drivers/clocksource/clksrc-dbx500-prcmu.c
drivers/clocksource/cs5535-clockevt.c
drivers/clocksource/cyclone.c
drivers/clocksource/scx200_hrt.c
drivers/cpuidle/cpuidle.c
drivers/gpu/drm/exynos/exynos_drm_connector.c
drivers/gpu/drm/exynos/exynos_drm_drv.c
drivers/gpu/drm/exynos/exynos_drm_fbdev.c
drivers/gpu/drm/exynos/exynos_drm_fimd.c
drivers/gpu/drm/gma500/cdv_device.c
drivers/gpu/drm/gma500/framebuffer.c
drivers/gpu/drm/gma500/gtt.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_blit_shaders.c
drivers/gpu/drm/radeon/r600d.h
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_encoders.c
drivers/gpu/drm/radeon/radeon_fb.c
drivers/gpu/drm/radeon/radeon_mode.h
drivers/hid/hid-ids.h
drivers/hid/hid-input.c
drivers/hid/usbhid/hid-quirks.c
drivers/hwmon/Kconfig
drivers/hwmon/jc42.c
drivers/hwmon/pmbus/pmbus_core.c
drivers/hwmon/pmbus/zl6100.c
drivers/hwmon/w83627ehf.c
drivers/i2c/algos/i2c-algo-bit.c
drivers/i2c/i2c-core.c
drivers/input/evdev.c
drivers/input/misc/twl4030-vibra.c
drivers/input/mouse/alps.c
drivers/input/tablet/Kconfig
drivers/input/tablet/wacom_wac.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/intel-iommu.c
drivers/md/dm-flakey.c
drivers/md/dm-io.c
drivers/md/dm-ioctl.c
drivers/md/dm-raid.c
drivers/md/dm-thin-metadata.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/media/dvb/siano/smsdvb.c
drivers/media/video/davinci/isif.c
drivers/media/video/uvc/uvc_video.c
drivers/mfd/ab8500-core.c
drivers/mfd/mfd-core.c
drivers/mfd/s5m-core.c
drivers/mfd/tps65910.c
drivers/mfd/tps65912-core.c
drivers/mfd/wm8350-irq.c
drivers/mfd/wm8994-core.c
drivers/mfd/wm8994-regmap.c
drivers/misc/ad525x_dpot-i2c.c
drivers/misc/ad525x_dpot-spi.c
drivers/misc/apds9802als.c
drivers/misc/apds990x.c
drivers/misc/bh1770glc.c
drivers/misc/bh1780gli.c
drivers/misc/bmp085.c
drivers/misc/c2port/core.c
drivers/misc/carma/carma-fpga.c
drivers/misc/cs5535-mfgpt.c
drivers/misc/ds1682.c
drivers/misc/eeprom/at25.c
drivers/misc/eeprom/eeprom.c
drivers/misc/eeprom/eeprom_93xx46.c
drivers/misc/eeprom/max6875.c
drivers/misc/fsa9480.c
drivers/misc/hmc6352.c
drivers/misc/ics932s401.c
drivers/misc/isl29003.c
drivers/misc/isl29020.c
drivers/misc/lis3lv02d/lis3lv02d_i2c.c
drivers/misc/lis3lv02d/lis3lv02d_spi.c
drivers/misc/max8997-muic.c
drivers/misc/spear13xx_pcie_gadget.c
drivers/misc/ti-st/st_kim.c
drivers/misc/ti_dac7512.c
drivers/misc/tsl2550.c
drivers/mmc/core/core.c
drivers/mmc/core/host.c
drivers/mmc/core/mmc.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio.c
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/mmci.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/net/caif/caif_hsi.c
drivers/net/ethernet/atheros/atl1c/atl1c_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/ibm/ehea/ehea_main.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
drivers/net/ethernet/packetengines/Kconfig
drivers/net/ethernet/qlogic/qla3xxx.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/tun.c
drivers/net/usb/asix.c
drivers/net/usb/usbnet.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/wimax/i2400m/netdev.c
drivers/net/wireless/ath/ath9k/ar5008_phy.c
drivers/net/wireless/ath/ath9k/ar9002_hw.c
drivers/net/wireless/ath/ath9k/hw.h
drivers/net/wireless/ath/carl9170/tx.c
drivers/net/wireless/brcm80211/brcmsmac/ampdu.c
drivers/net/wireless/iwlegacy/3945-mac.c
drivers/net/wireless/iwlegacy/3945.c
drivers/net/wireless/iwlwifi/iwl-agn-lib.c
drivers/net/wireless/iwlwifi/iwl-agn-sta.c
drivers/net/wireless/mwifiex/cfg80211.c
drivers/net/wireless/rt2x00/rt2x00dev.c
drivers/net/wireless/rt2x00/rt2x00mac.c
drivers/net/wireless/rt2x00/rt2x00queue.c
drivers/of/fdt.c
drivers/of/of_mdio.c
drivers/pci/pcie/aspm.c
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/acer-wmi.c
drivers/platform/x86/amilo-rfkill.c [new file with mode: 0644]
drivers/platform/x86/fujitsu-tablet.c [new file with mode: 0644]
drivers/platform/x86/panasonic-laptop.c
drivers/pps/pps.c
drivers/rapidio/devices/tsi721.c
drivers/rapidio/devices/tsi721.h
drivers/regulator/da9052-regulator.c
drivers/regulator/tps6524x-regulator.c
drivers/regulator/tps65910-regulator.c
drivers/rtc/interface.c
drivers/rtc/rtc-r9701.c
drivers/s390/cio/qdio_main.c
drivers/scsi/sd_dif.c
drivers/spi/spi-pl022.c
drivers/target/iscsi/iscsi_target.c
drivers/target/target_core_pr.c
drivers/target/target_core_transport.c
drivers/target/tcm_fc/tfc_sess.c
drivers/tty/Kconfig
drivers/tty/serial/sh-sci.c
drivers/usb/host/ehci-fsl.c
drivers/usb/host/ehci-fsl.h
drivers/video/backlight/s6e63m0.c
fs/Kconfig
fs/afs/file.c
fs/afs/internal.h
fs/afs/rxrpc.c
fs/aio.c
fs/binfmt_aout.c
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/reada.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/xattr.c
fs/dcache.c
fs/eventpoll.c
fs/exec.c
fs/inode.c
fs/namei.c
fs/nilfs2/the_nilfs.c
fs/proc/base.c
fs/udf/file.c
include/linux/amba/serial.h
include/linux/clocksource.h
include/linux/dcache.h
include/linux/ftrace.h
include/linux/ftrace_event.h
include/linux/genhd.h
include/linux/init_task.h
include/linux/interrupt.h
include/linux/iocontext.h
include/linux/jump_label.h
include/linux/kernel.h
include/linux/kmsg_dump.h
include/linux/math64.h
include/linux/memcontrol.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/of.h
include/linux/percpu.h
include/linux/perf_event.h
include/linux/preempt.h
include/linux/printk.h
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/sched.h
include/linux/srcu.h
include/linux/static_key.h [new file with mode: 0644]
include/linux/tcp.h
include/linux/timex.h
include/linux/tracepoint.h
include/linux/wait.h
include/linux/workqueue.h
include/net/inetpeer.h
include/net/sock.h
include/net/tcp.h
include/trace/events/power.h
include/trace/events/printk.h [new file with mode: 0644]
include/trace/events/rcu.h
include/trace/events/sched.h
include/trace/events/signal.h
init/Kconfig
init/main.c
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/exit.c
kernel/fork.c
kernel/futex.c
kernel/hung_task.c
kernel/irq/chip.c
kernel/irq/handle.c
kernel/irq/internals.h
kernel/irq/manage.c
kernel/jump_label.c
kernel/kprobes.c
kernel/lockdep.c
kernel/mutex.c
kernel/printk.c
kernel/rcu.h
kernel/rcupdate.c
kernel/rcutiny.c
kernel/rcutiny_plugin.h
kernel/rcutorture.c
kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h
kernel/rcutree_trace.c
kernel/sched/auto_group.c
kernel/sched/core.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stats.c
kernel/signal.c
kernel/softirq.c
kernel/srcu.c
kernel/sys.c
kernel/time/ntp.c
kernel/time/tick-broadcast.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_entries.h
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_export.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_output.c
kernel/trace/trace_syscalls.c
kernel/tracepoint.c
kernel/watchdog.c
kernel/workqueue.c
lib/Kconfig.debug
lib/debugobjects.c
lib/dynamic_queue_limits.c
lib/vsprintf.c
mm/huge_memory.c
mm/hugetlb.c
mm/ksm.c
mm/memcontrol.c
mm/mempolicy.c
mm/migrate.c
mm/mlock.c
mm/mmap.c
mm/mprotect.c
mm/page_cgroup.c
mm/percpu-vm.c
mm/swap.c
mm/swap_state.c
net/bridge/br_multicast.c
net/bridge/br_netfilter.c
net/bridge/br_stp.c
net/bridge/br_stp_if.c
net/bridge/netfilter/ebtables.c
net/core/dev.c
net/core/net-sysfs.c
net/core/rtnetlink.c
net/core/sock.c
net/core/sysctl_net_core.c
net/ipv4/cipso_ipv4.c
net/ipv4/inetpeer.c
net/ipv4/ip_sockglue.c
net/ipv4/route.c
net/ipv4/syncookies.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_memcontrol.c
net/ipv6/addrconf.c
net/ipv6/mcast.c
net/ipv6/route.c
net/mac80211/iface.c
net/mac80211/mesh_pathtbl.c
net/mac80211/rate.c
net/netfilter/core.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_netlink.c
net/openvswitch/actions.c
net/openvswitch/datapath.c
net/sched/sch_sfq.c
sound/pci/hda/patch_realtek.c
sound/pci/rme9652/hdspm.c
sound/soc/samsung/neo1973_wm8753.c
tools/perf/Documentation/Makefile
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/MANIFEST
tools/perf/Makefile
tools/perf/arch/powerpc/util/header.c
tools/perf/arch/x86/util/header.c
tools/perf/bench/bench.h
tools/perf/bench/mem-memcpy-x86-64-asm-def.h
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/bench/mem-memcpy.c
tools/perf/bench/mem-memset-arch.h [new file with mode: 0644]
tools/perf/bench/mem-memset-x86-64-asm-def.h [new file with mode: 0644]
tools/perf/bench/mem-memset-x86-64-asm.S [new file with mode: 0644]
tools/perf/bench/mem-memset.c [new file with mode: 0644]
tools/perf/builtin-bench.c
tools/perf/builtin-lock.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-top.c
tools/perf/perf.h
tools/perf/python/twatch.py
tools/perf/util/annotate.c
tools/perf/util/bitmap.c
tools/perf/util/color.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/ctype.c
tools/perf/util/debugfs.c
tools/perf/util/debugfs.h
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/asm/dwarf2.h
tools/perf/util/include/asm/unistd_32.h [new file with mode: 0644]
tools/perf/util/include/asm/unistd_64.h [new file with mode: 0644]
tools/perf/util/include/linux/bitmap.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/parse-events.c
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/python-ext-sources [new file with mode: 0644]
tools/perf/util/python.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/strbuf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/sysfs.c [new file with mode: 0644]
tools/perf/util/sysfs.h [new file with mode: 0644]
tools/perf/util/thread_map.c
tools/perf/util/thread_map.h
tools/perf/util/top.c
tools/perf/util/top.h
tools/perf/util/trace-event-parse.c
tools/perf/util/trace-event-read.c
tools/perf/util/trace-event-scripting.c
tools/perf/util/ui/browsers/annotate.c
tools/perf/util/ui/browsers/hists.c
tools/perf/util/ui/browsers/map.c
tools/perf/util/ui/helpline.c
tools/perf/util/usage.c
tools/perf/util/util.c
tools/perf/util/util.h

index c43460d..7c1dfb1 100644 (file)
@@ -1,9 +1,10 @@
-Read the F-ing Papers!
+Read the Fscking Papers!
 
 
 This document describes RCU-related publications, and is followed by
 the corresponding bibtex entries.  A number of the publications may
-be found at http://www.rdrop.com/users/paulmck/RCU/.
+be found at http://www.rdrop.com/users/paulmck/RCU/.  For others, browsers
+and search engines will usually find what you are looking for.
 
 The first thing resembling RCU was published in 1980, when Kung and Lehman
 [Kung80] recommended use of a garbage collector to defer destruction
@@ -160,7 +161,26 @@ which Mathieu Desnoyers is now maintaining [MathieuDesnoyers2009URCU]
 [MathieuDesnoyersPhD].  TINY_RCU [PaulEMcKenney2009BloatWatchRCU] made
 its appearance, as did expedited RCU [PaulEMcKenney2009expeditedRCU].
 The problem of resizeable RCU-protected hash tables may now be on a path
-to a solution [JoshTriplett2009RPHash].
+to a solution [JoshTriplett2009RPHash].  A few academic researchers are now
+using RCU to solve their parallel problems [HariKannan2009DynamicAnalysisRCU].
+
+2010 produced a simpler preemptible-RCU implementation
+based on TREE_RCU [PaulEMcKenney2010SimpleOptRCU], lockdep-RCU
+[PaulEMcKenney2010LockdepRCU], another resizeable RCU-protected hash
+table [HerbertXu2010RCUResizeHash] (this one consuming more memory,
+but allowing arbitrary changes in hash function, as required for DoS
+avoidance in the networking code), realization of the 2009 RCU-protected
+hash table with atomic node move [JoshTriplett2010RPHash], an update on
+the RCU API [PaulEMcKenney2010RCUAPI].
+
+2011 marked the inclusion of Nick Piggin's fully lockless dentry search
+[LinusTorvalds2011Linux2:6:38:rc1:NPigginVFS], an RCU-protected red-black
+tree using software transactional memory to protect concurrent updates
+(strange, but true!) [PhilHoward2011RCUTMRBTree], yet another variant of
+RCU-protected resizeable hash tables [Triplett:2011:RPHash], the 3.0 RCU
+trainwreck [PaulEMcKenney2011RCU3.0trainwreck], and Neil Brown's "Meet the
+Lockers" LWN article [NeilBrown2011MeetTheLockers].
+
 
 Bibtex Entries
 
@@ -173,6 +193,14 @@ Bibtex Entries
 ,volume="5"
 ,number="3"
 ,pages="354-382"
+,note="Available:
+\url{http://portal.acm.org/citation.cfm?id=320619&dl=GUIDE,}
+[Viewed December 3, 2007]"
+,annotation={
+       Use garbage collector to clean up data after everyone is done with it.
+       .
+       Oldest use of something vaguely resembling RCU that I have found.
+}
 }
 
 @techreport{Manber82
@@ -184,6 +212,31 @@ Bibtex Entries
 ,number="82-01-01"
 ,month="January"
 ,pages="28"
+,annotation={
+       .
+       Superseded by Manber84.
+       .
+       Describes concurrent AVL tree implementation.  Uses a
+       garbage-collection mechanism to handle concurrent use and deletion
+       of nodes in the tree, but lacks the summary-of-execution-history
+       concept of read-copy locking.
+       .
+       Keeps full list of processes that were active when a given
+       node was to be deleted, and waits until all such processes have
+       -terminated- before allowing this node to be reused.  This is
+       not described in great detail -- one could imagine using process
+       IDs for this if the ID space was large enough that overlapping
+       never occurred.
+       .
+       This restriction makes this algorithm unsuitable for use in
+       systems comprised of long-lived processes.  It also produces
+       completely unacceptable overhead in systems with large numbers
+       of processes.  Finally, it is specific to AVL trees.
+       .
+       Cites Kung80, so not an independent invention, but the first
+       RCU-like usage that does not rely on an automatic garbage
+       collector.
+}
 }
 
 @article{Manber84
@@ -195,6 +248,74 @@ Bibtex Entries
 ,volume="9"
 ,number="3"
 ,pages="439-455"
+,annotation={
+       Describes concurrent AVL tree implementation.  Uses a
+       garbage-collection mechanism to handle concurrent use and deletion
+       of nodes in the tree, but lacks the summary-of-execution-history
+       concept of read-copy locking.
+       .
+       Keeps full list of processes that were active when a given
+       node was to be deleted, and waits until all such processes have
+       -terminated- before allowing this node to be reused.  This is
+       not described in great detail -- one could imagine using process
+       IDs for this if the ID space was large enough that overlapping
+       never occurred.
+       .
+       This restriction makes this algorithm unsuitable for use in
+       systems comprised of long-lived processes.  It also produces
+       completely unacceptable overhead in systems with large numbers
+       of processes.  Finally, it is specific to AVL trees.
+}
+}
+
+@Conference{RichardRashid87a
+,Author="Richard Rashid and Avadis Tevanian and Michael Young and
+David Golub and Robert Baron and David Black and William Bolosky and
+Jonathan Chew"
+,Title="Machine-Independent Virtual Memory Management for Paged
+Uniprocessor and Multiprocessor Architectures"
+,Booktitle="{2\textsuperscript{nd} Symposium on Architectural Support
+for Programming Languages and Operating Systems}"
+,Publisher="Association for Computing Machinery"
+,Month="October"
+,Year="1987"
+,pages="31-39"
+,Address="Palo Alto, CA"
+,note="Available:
+\url{http://www.cse.ucsc.edu/~randal/221/rashid-machvm.pdf}
+[Viewed February 17, 2005]"
+,annotation={
+       Describes lazy TLB flush, where one waits for each CPU to pass
+       through a scheduling-clock interrupt before reusing a given range
+       of virtual address.  Does not describe how one determines that
+       all CPUs have in fact taken such an interrupt, though there are
+       no shortage of straightforward methods for accomplishing this.
+       .
+       Note that it does not make sense to just wait a fixed amount of
+       time, since a given CPU might have interrupts disabled for an
+       extended amount of time.
+}
+}
+
+@article{BarbaraLiskov1988ArgusCACM
+,author = {Barbara Liskov}
+,title = {Distributed programming in {Argus}}
+,journal = {Commun. ACM}
+,volume = {31}
+,number = {3}
+,year = {1988}
+,issn = {0001-0782}
+,pages = {300--312}
+,doi = {http://doi.acm.org/10.1145/42392.42399}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation= {
+       At the top of page 307: "Conflicts with deposits and withdrawals
+       are necessary if the reported total is to be up to date.  They
+       could be avoided by having total return a sum that is slightly
+       out of date."  Relies on semantics -- approximate numerical
+       values sometimes OK.
+}
 }
 
 @techreport{Hennessy89
@@ -216,6 +337,13 @@ Bibtex Entries
 ,year="1990"
 ,number="CS-TR-2222.1"
 ,month="June"
+,annotation={
+       Concurrent access to skip lists.  Has both weak and strong search.
+       Uses concept of ``garbage queue'', but has no real way of cleaning
+       the garbage efficiently.
+       .
+       Appears to be an independent invention of an RCU-like mechanism.
+}
 }
 
 @Book{Adams91
@@ -223,20 +351,15 @@ Bibtex Entries
 ,title="Concurrent Programming, Principles, and Practices"
 ,Publisher="Benjamin Cummins"
 ,Year="1991"
+,annotation={
+       Has a few paragraphs describing ``chaotic relaxation'', a
+       numerical analysis technique that allows multiprocessors to
+       avoid synchronization overhead by using possibly-stale data.
+       .
+       Seems like this is descended from yet another independent
+       invention of RCU-like function -- but this is restricted
+       in that reclamation is not necessary.
 }
-
-@phdthesis{HMassalinPhD
-,author="H. Massalin"
-,title="Synthesis: An Efficient Implementation of Fundamental Operating
-System Services"
-,school="Columbia University"
-,address="New York, NY"
-,year="1992"
-,annotation="
-       Mondo optimizing compiler.
-       Wait-free stuff.
-       Good advice: defer work to avoid synchronization.
-"
 }
 
 @unpublished{Jacobson93
@@ -244,7 +367,13 @@ System Services"
 ,title="Avoid Read-Side Locking Via Delayed Free"
 ,year="1993"
 ,month="September"
-,note="Verbal discussion"
+,note="private communication"
+,annotation={
+       Use fixed time delay to approximate grace period.  Very simple,
+       but subject to random memory corruption under heavy load.
+       .
+       Independent invention of RCU-like mechanism.
+}
 }
 
 @Conference{AjuJohn95
@@ -256,6 +385,17 @@ System Services"
 ,Year="1995"
 ,pages="11-23"
 ,Address="New Orleans, LA"
+,note="Available:
+\url{https://www.usenix.org/publications/library/proceedings/neworl/full_papers/john.a}
+[Viewed October 1, 2010]"
+,annotation={
+       Age vnodes out of the cache, and have a fixed time set by a kernel
+       parameter.  Not clear that all races were in fact correctly handled.
+       Used a 20-minute time by default, which would most definitely not
+       be suitable during DoS attacks or virus scans.
+       .
+       Apparently independent invention of RCU-like mechanism.
+}
 }
 
 @conference{Pu95a,
@@ -301,31 +441,47 @@ Utilizing Execution History and Thread Monitoring"
 ,institution="US Patent and Trademark Office"
 ,address="Washington, DC"
 ,year="1995"
-,number="US Patent 5,442,758 (contributed under GPL)"
+,number="US Patent 5,442,758"
 ,month="August"
+,annotation={
+       Describes the parallel RCU infrastructure.  Includes NUMA aspect
+       (structure of bitmap can reflect bus structure of computer system).
+       .
+       Another independent invention of an RCU-like mechanism, but the
+       "real" RCU this time!
+}
 }
 
 @techreport{Slingwine97
 ,author="John D. Slingwine and Paul E. McKenney"
-,title="Method for maintaining data coherency using thread
-activity summaries in a multicomputer system"
+,title="Method for Maintaining Data Coherency Using Thread Activity
+Summaries in a Multicomputer System"
 ,institution="US Patent and Trademark Office"
 ,address="Washington, DC"
 ,year="1997"
-,number="US Patent 5,608,893 (contributed under GPL)"
+,number="US Patent 5,608,893"
 ,month="March"
+,pages="19"
+,annotation={
+       Describes use of RCU to synchronize data between a pair of
+       SMP/NUMA computer systems.
+}
 }
 
 @techreport{Slingwine98
 ,author="John D. Slingwine and Paul E. McKenney"
-,title="Apparatus and method for achieving reduced overhead
-mutual exclusion and maintaining coherency in a multiprocessor
-system utilizing execution history and thread monitoring"
+,title="Apparatus and Method for Achieving Reduced Overhead Mutual
+Exclusion and Maintaining Coherency in a Multiprocessor System
+Utilizing Execution History and Thread Monitoring"
 ,institution="US Patent and Trademark Office"
 ,address="Washington, DC"
 ,year="1998"
-,number="US Patent 5,727,209 (contributed under GPL)"
+,number="US Patent 5,727,209"
 ,month="March"
+,annotation={
+       Describes doing an atomic update by copying the data item and
+       then substituting it into the data structure.
+}
 }
 
 @Conference{McKenney98
@@ -337,6 +493,15 @@ Problems"
 ,Year="1998"
 ,pages="509-518"
 ,Address="Las Vegas, NV"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/rclockpdcsproof.pdf}
+[Viewed December 3, 2007]"
+,annotation={
+       Describes and analyzes RCU mechanism in DYNIX/ptx.  Describes
+       application to linked list update and log-buffer flushing.
+       Defines 'quiescent state'.  Includes both measured and analytic
+       evaluation.
+}
 }
 
 @Conference{Gamsa99
@@ -349,18 +514,76 @@ Operating System Design and Implementation}"
 ,Year="1999"
 ,pages="87-100"
 ,Address="New Orleans, LA"
+,note="Available:
+\url{http://www.usenix.org/events/osdi99/full_papers/gamsa/gamsa.pdf}
+[Viewed August 30, 2006]"
+,annotation={
+       Use of RCU-like facility in K42/Tornado.  Another independent
+       invention of RCU.
+       See especially pages 7-9 (Section 5).
+}
+}
+
+@unpublished{RustyRussell2000a
+,Author="Rusty Russell"
+,Title="Re: modular net drivers"
+,month="June"
+,year="2000"
+,day="23"
+,note="Available:
+\url{http://oss.sgi.com/projects/netdev/archive/2000-06/msg00250.html}
+[Viewed April 10, 2006]"
+,annotation={
+       Proto-RCU proposal from Phil Rumpf and Rusty Russell.
+       Yet another independent invention of RCU.
+       Outline of algorithm to unload modules...
+       .
+       Appeared on net-dev mailing list.
+}
+}
+
+@unpublished{RustyRussell2000b
+,Author="Rusty Russell"
+,Title="Re: modular net drivers"
+,month="June"
+,year="2000"
+,day="24"
+,note="Available:
+\url{http://oss.sgi.com/projects/netdev/archive/2000-06/msg00254.html}
+[Viewed April 10, 2006]"
+,annotation={
+       Proto-RCU proposal from Phil Rumpf and Rusty Russell.
+       .
+       Appeared on net-dev mailing list.
+}
+}
+
+@unpublished{McKenney01b
+,Author="Paul E. McKenney and Dipankar Sarma"
+,Title="Read-Copy Update Mutual Exclusion in {Linux}"
+,month="February"
+,year="2001"
+,note="Available:
+\url{http://lse.sourceforge.net/locking/rcu/rcupdate_doc.html}
+[Viewed October 18, 2004]"
+,annotation={
+       Prototypical Linux documentation for RCU.
+}
 }
 
 @techreport{Slingwine01
 ,author="John D. Slingwine and Paul E. McKenney"
-,title="Apparatus and method for achieving reduced overhead
-mutual exclusion and maintaining coherency in a multiprocessor
-system utilizing execution history and thread monitoring"
+,title="Apparatus and Method for Achieving Reduced Overhead Mutual
+Exclusion and Maintaining Coherency in a Multiprocessor System
+Utilizing Execution History and Thread Monitoring"
 ,institution="US Patent and Trademark Office"
 ,address="Washington, DC"
 ,year="2001"
-,number="US Patent 5,219,690 (contributed under GPL)"
+,number="US Patent 6,219,690"
 ,month="April"
+,annotation={
+       'Change in mode' aspect of RCU.  Can be thought of as a lazy barrier.
+}
 }
 
 @Conference{McKenney01a
@@ -372,14 +595,61 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
 ,Year="2001"
 ,note="Available:
 \url{http://www.linuxsymposium.org/2001/abstracts/readcopy.php}
-\url{http://www.rdrop.com/users/paulmck/rclock/rclock_OLS.2001.05.01c.pdf}
+\url{http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf}
 [Viewed June 23, 2004]"
-annotation="
-Described RCU, and presented some patches implementing and using it in
-the Linux kernel.
+,annotation={
+       Described RCU, and presented some patches implementing and using
+       it in the Linux kernel.
+}
+}
+
+@unpublished{McKenney01f
+,Author="Paul E. McKenney"
+,Title="{RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=100259266316456&w=2}
+[Viewed June 23, 2004]"
+,annotation="
+       Memory-barrier and Alpha thread.  100 messages, not too bad...
+"
+}
+
+@unpublished{Spraul01
+,Author="Manfred Spraul"
+,Title="Re: {RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=100264675012867&w=2}
+[Viewed June 23, 2004]"
+,annotation="
+       Suggested burying memory barriers in Linux's list-manipulation
+       primitives.
 "
 }
 
+@unpublished{LinusTorvalds2001a
+,Author="Linus Torvalds"
+,Title="{Re:} {[Lse-tech]} {Re:} {RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://lkml.org/lkml/2001/10/13/105}
+[Viewed August 21, 2004]"
+}
+
+@unpublished{Blanchard02a
+,Author="Anton Blanchard"
+,Title="some RCU dcache and ratcache results"
+,month="March"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=101637107412972&w=2}
+[Viewed October 18, 2004]"
+}
+
 @Conference{Linder02a
 ,Author="Hanna Linder and Dipankar Sarma and Maneesh Soni"
 ,Title="Scalability of the Directory Entry Cache"
@@ -387,6 +657,10 @@ the Linux kernel.
 ,Month="June"
 ,Year="2002"
 ,pages="289-300"
+,annotation="
+       Measured scalability of Linux 2.4 kernel's directory-entry cache
+       (dcache), and measured some scalability enhancements.
+"
 }
 
 @Conference{McKenney02a
@@ -400,49 +674,76 @@ Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell"
 ,note="Available:
 \url{http://www.linux.org.uk/~ajh/ols2002_proceedings.pdf.gz}
 [Viewed June 23, 2004]"
+,annotation="
+       Presented and compared a number of RCU implementations for the
+       Linux kernel.
+"
 }
 
-@conference{Michael02a
-,author="Maged M. Michael"
-,title="Safe Memory Reclamation for Dynamic Lock-Free Objects Using Atomic
-Reads and Writes"
-,Year="2002"
-,Month="August"
-,booktitle="{Proceedings of the 21\textsuperscript{st} Annual ACM
-Symposium on Principles of Distributed Computing}"
-,pages="21-30"
+@unpublished{Sarma02a
+,Author="Dipankar Sarma"
+,Title="specweb99: dcache scalability results"
+,month="July"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=102645767914212&w=2}
+[Viewed June 23, 2004]"
 ,annotation="
-       Each thread keeps an array of pointers to items that it is
-       currently referencing.  Sort of an inside-out garbage collection
-       mechanism, but one that requires the accessing code to explicitly
-       state its needs.  Also requires read-side memory barriers on
-       most architectures.
+       Compare fastwalk and RCU for dcache.  RCU won.
 "
 }
 
-@conference{Michael02b
-,author="Maged M. Michael"
-,title="High Performance Dynamic Lock-Free Hash Tables and List-Based Sets"
-,Year="2002"
-,Month="August"
-,booktitle="{Proceedings of the 14\textsuperscript{th} Annual ACM
-Symposium on Parallel
-Algorithms and Architecture}"
-,pages="73-82"
+@unpublished{Barbieri02
+,Author="Luca Barbieri"
+,Title="Re: {[PATCH]} Initial support for struct {vfs\_cred}"
+,month="August"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=103082050621241&w=2}
+[Viewed: June 23, 2004]"
 ,annotation="
-       Like the title says...
+       Suggested RCU for vfs\_shared\_cred.
 "
 }
 
-@InProceedings{HerlihyLM02
-,author={Maurice Herlihy and Victor Luchangco and Mark Moir}
-,title="The Repeat Offender Problem: A Mechanism for Supporting Dynamic-Sized,
-Lock-Free Data Structures"
-,booktitle={Proceedings of 16\textsuperscript{th} International
-Symposium on Distributed Computing}
-,year=2002
+@unpublished{Dickins02a
+,author="Hugh Dickins"
+,title="Use RCU for System-V IPC"
+,year="2002"
+,month="October"
+,note="private communication"
+}
+
+@unpublished{Sarma02b
+,Author="Dipankar Sarma"
+,Title="Some dcache\_rcu benchmark numbers"
 ,month="October"
-,pages="339-353"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=103462075416638&w=2}
+[Viewed June 23, 2004]"
+,annotation="
+       Performance of dcache RCU on kernbench for 16x NUMA-Q and 1x,
+       2x, and 4x systems.  RCU does no harm, and helps on 16x.
+"
+}
+
+@unpublished{LinusTorvalds2003a
+,Author="Linus Torvalds"
+,Title="Re: {[PATCH]} small fixes in brlock.h"
+,month="March"
+,year="2003"
+,note="Available:
+\url{http://lkml.org/lkml/2003/3/9/205}
+[Viewed March 13, 2006]"
+,annotation="
+       Linus suggests replacing brlock with RCU and/or seqlocks:
+       .
+       'It's entirely possible that the current user could be replaced
+       by RCU and/or seqlocks, and we could get rid of brlocks entirely.'
+       .
+       Steve Hemminger responds by replacing them with RCU.
+"
 }
 
 @article{Appavoo03a
@@ -457,6 +758,20 @@ B. Rosenburg and M. Stumm and J. Xenidis"
 ,volume="42"
 ,number="1"
 ,pages="60-76"
+,annotation="
+       Use of RCU to enable hot-swapping for autonomic behavior in K42.
+"
+}
+
+@unpublished{Seigh03
+,author="Joseph W. {Seigh II}"
+,title="Read Copy Update"
+,Year="2003"
+,Month="March"
+,note="email correspondence"
+,annotation="
+       Described the relationship of the VM/XA passive serialization to RCU.
+"
 }
 
 @Conference{Arcangeli03
@@ -470,6 +785,27 @@ Dipankar Sarma"
 ,year="2003"
 ,month="June"
 ,pages="297-310"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/rcu.FREENIX.2003.06.14.pdf}
+[Viewed November 21, 2007]"
+,annotation="
+       Compared updated RCU implementations for the Linux kernel, and
+       described System V IPC use of RCU, including order-of-magnitude
+       performance improvements.
+"
+}
+
+@Conference{Soules03a
+,Author="Craig A. N. Soules and Jonathan Appavoo and Kevin Hui and
+Dilma {Da Silva} and Gregory R. Ganger and Orran Krieger and
+Michael Stumm and Robert W. Wisniewski and Marc Auslander and
+Michal Ostrowski and Bryan Rosenburg and Jimi Xenidis"
+,Title="System Support for Online Reconfiguration"
+,Booktitle="Proceedings of the 2003 USENIX Annual Technical Conference"
+,Publisher="USENIX Association"
+,year="2003"
+,month="June"
+,pages="141-154"
 }
 
 @article{McKenney03a
@@ -481,6 +817,22 @@ Dipankar Sarma"
 ,volume="1"
 ,number="114"
 ,pages="18-26"
+,note="Available:
+\url{http://www.linuxjournal.com/article/6993}
+[Viewed November 14, 2007]"
+,annotation="
+       Reader-friendly intro to RCU, with the infamous old-man-and-brat
+       cartoon.
+"
+}
+
+@unpublished{Sarma03a
+,Author="Dipankar Sarma"
+,Title="RCU low latency patches"
+,month="December"
+,year="2003"
+,note="Message ID: 20031222180114.GA2248@in.ibm.com"
+,annotation="dipankar/ct.2004.03.27/RCUll.2003.12.22.patch"
 }
 
 @techreport{Friedberg03a
@@ -489,9 +841,14 @@ Dipankar Sarma"
 ,institution="US Patent and Trademark Office"
 ,address="Washington, DC"
 ,year="2003"
-,number="US Patent 6,662,184 (contributed under GPL)"
+,number="US Patent 6,662,184"
 ,month="December"
 ,pages="112"
+,annotation="
+       Applies RCU to a wildcard-search Patricia tree in order to permit
+       synchronization-free lookup.  RCU is used to retain removed nodes
+       for a grace period before freeing them.
+"
 }
 
 @article{McKenney04a
@@ -503,6 +860,12 @@ Dipankar Sarma"
 ,volume="1"
 ,number="118"
 ,pages="38-46"
+,note="Available:
+\url{http://www.linuxjournal.com/node/7124}
+[Viewed December 26, 2010]"
+,annotation="
+       Reader friendly intro to dcache and RCU.
+"
 }
 
 @Conference{McKenney04b
@@ -514,152 +877,824 @@ Dipankar Sarma"
 ,Address="Adelaide, Australia"
 ,note="Available:
 \url{http://www.linux.org.au/conf/2004/abstracts.html#90}
-\url{http://www.rdrop.com/users/paulmck/rclock/lockperf.2004.01.17a.pdf}
+\url{http://www.rdrop.com/users/paulmck/RCU/lockperf.2004.01.17a.pdf}
 [Viewed June 23, 2004]"
+,annotation="
+       Compares performance of RCU to that of other locking primitives
+       over a number of CPUs (x86, Opteron, Itanium, and PPC).
+"
 }
 
-@phdthesis{PaulEdwardMcKenneyPhD
-,author="Paul E. McKenney"
-,title="Exploiting Deferred Destruction:
-An Analysis of Read-Copy-Update Techniques
-in Operating System Kernels"
-,school="OGI School of Science and Engineering at
-Oregon Health and Sciences University"
+@unpublished{Sarma04a
+,Author="Dipankar Sarma"
+,Title="{[PATCH]} {RCU} for low latency (experimental)"
+,month="March"
+,year="2004"
+,note="\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108003746402892&w=2}"
+,annotation="Head of thread: dipankar/2004.03.23/rcu-low-lat.1.patch"
+}
+
+@unpublished{Sarma04b
+,Author="Dipankar Sarma"
+,Title="Re: {[PATCH]} {RCU} for low latency (experimental)"
+,month="March"
+,year="2004"
+,note="\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108016474829546&w=2}"
+,annotation="dipankar/rcuth.2004.03.24/rcu-throttle.patch"
+}
+
+@unpublished{Spraul04a
+,Author="Manfred Spraul"
+,Title="[RFC] 0/5 rcu lock update"
+,month="May"
 ,year="2004"
 ,note="Available:
-\url{http://www.rdrop.com/users/paulmck/RCU/RCUdissertation.2004.07.14e1.pdf}
-[Viewed October 15, 2004]"
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108546407726602&w=2}
+[Viewed June 23, 2004]"
+,annotation="
+       Hierarchical-bitmap patch for RCU infrastructure.
+"
+}
+
+@unpublished{Steiner04a
+,Author="Jack Steiner"
+,Title="Re: [Lse-tech] [RFC, PATCH] 1/5 rcu lock update:
+Add per-cpu batch counter"
+,month="May"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108551764515332&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+       RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches,
+       which may be found at:
+       https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines)
+       https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch)
+       https://lkml.org/lkml/2004/5/25/24 (0/5)
+       https://lkml.org/lkml/2004/5/25/23 (1/5)
+               https://lkml.org/lkml/2004/5/25/265 (works for Jack)
+       https://lkml.org/lkml/2004/5/25/20 (2/5)
+       https://lkml.org/lkml/2004/5/25/22 (3/5)
+       https://lkml.org/lkml/2004/5/25/19 (4/5)
+       https://lkml.org/lkml/2004/5/25/21 (5/5)
+}
 }
 
 @Conference{Sarma04c
 ,Author="Dipankar Sarma and Paul E. McKenney"
-,Title="Making RCU Safe for Deep Sub-Millisecond Response Realtime Applications"
+,Title="Making {RCU} Safe for Deep Sub-Millisecond Response
+Realtime Applications"
 ,Booktitle="Proceedings of the 2004 USENIX Annual Technical Conference
 (FREENIX Track)"
 ,Publisher="USENIX Association"
 ,year="2004"
 ,month="June"
 ,pages="182-191"
+,annotation="
+       Describes and compares a number of modifications to the Linux RCU
+       implementation that make it friendly to realtime applications.
+"
 }
 
-@unpublished{JamesMorris04b
-,Author="James Morris"
-,Title="Recent Developments in {SELinux} Kernel Performance"
-,month="December"
+@phdthesis{PaulEdwardMcKenneyPhD
+,author="Paul E. McKenney"
+,title="Exploiting Deferred Destruction:
+An Analysis of Read-Copy-Update Techniques
+in Operating System Kernels"
+,school="OGI School of Science and Engineering at
+Oregon Health and Sciences University"
 ,year="2004"
 ,note="Available:
-\url{http://www.livejournal.com/users/james_morris/2153.html}
-[Viewed December 10, 2004]"
+\url{http://www.rdrop.com/users/paulmck/RCU/RCUdissertation.2004.07.14e1.pdf}
+[Viewed October 15, 2004]"
+,annotation="
+       Describes RCU implementations and presents design patterns
+       corresponding to common uses of RCU in several operating-system
+       kernels.
+"
 }
 
-@unpublished{PaulMcKenney05a
-,Author="Paul E. McKenney"
-,Title="{[RFC]} {RCU} and {CONFIG\_PREEMPT\_RT} progress"
-,month="May"
-,year="2005"
+@unpublished{PaulEMcKenney2004rcu:dereference
+,Author="Dipankar Sarma"
+,Title="{Re: RCU : Abstracted RCU dereferencing [5/5]}"
+,month="August"
+,year="2004"
 ,note="Available:
-\url{http://lkml.org/lkml/2005/5/9/185}
-[Viewed May 13, 2005]"
+\url{http://lkml.org/lkml/2004/8/6/237}
+[Viewed June 8, 2010]"
 ,annotation="
-       First publication of working lock-based deferred free patches
-       for the CONFIG_PREEMPT_RT environment.
+       Introduce rcu_dereference().
 "
 }
 
-@conference{PaulMcKenney05b
-,Author="Paul E. McKenney and Dipankar Sarma"
-,Title="Towards Hard Realtime Response from the Linux Kernel on SMP Hardware"
-,Booktitle="linux.conf.au 2005"
-,month="April"
-,year="2005"
-,address="Canberra, Australia"
+@unpublished{JimHouston04a
+,Author="Jim Houston"
+,Title="{[RFC\&PATCH] Alternative {RCU} implementation}"
+,month="August"
+,year="2004"
 ,note="Available:
-\url{http://www.rdrop.com/users/paulmck/RCU/realtimeRCU.2005.04.23a.pdf}
-[Viewed May 13, 2005]"
+\url{http://lkml.org/lkml/2004/8/30/87}
+[Viewed February 17, 2005]"
 ,annotation="
-       Realtime turns into making RCU yet more realtime friendly.
+       Uses active code in rcu_read_lock() and rcu_read_unlock() to
+       make RCU happen, allowing RCU to function on CPUs that do not
+       receive a scheduling-clock interrupt.
 "
 }
 
-@conference{ThomasEHart2006a
-,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
-,Title="Making Lockless Synchronization Fast: Performance Implications
-of Memory Reclamation"
-,Booktitle="20\textsuperscript{th} {IEEE} International Parallel and
-Distributed Processing Symposium"
-,month="April"
-,year="2006"
-,day="25-29"
-,address="Rhodes, Greece"
+@unpublished{TomHart04a
+,Author="Thomas E. Hart"
+,Title="Master's Thesis: Applying Lock-free Techniques to the {Linux} Kernel"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://www.cs.toronto.edu/~tomhart/masters_thesis.html}
+[Viewed October 15, 2004]"
 ,annotation="
-       Compares QSBR (AKA "classic RCU"), HPBR, EBR, and lock-free
-       reference counting.
+       Proposes comparing RCU to lock-free methods for the Linux kernel.
 "
 }
 
-@Conference{PaulEMcKenney2006b
-,Author="Paul E. McKenney and Dipankar Sarma and Ingo Molnar and
-Suparna Bhattacharya"
-,Title="Extending RCU for Realtime and Embedded Workloads"
-,Booktitle="{Ottawa Linux Symposium}"
-,Month="July"
-,Year="2006"
-,pages="v2 123-138"
+@unpublished{Vaddagiri04a
+,Author="Srivatsa Vaddagiri"
+,Title="Subject: [RFC] Use RCU for tcp\_ehash lookup"
+,month="October"
+,year="2004"
 ,note="Available:
-\url{http://www.linuxsymposium.org/2006/index_2006.php}
-\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
-[Viewed January 1, 2007]"
+\url{http://marc.theaimsgroup.com/?t=109395731700004&r=1&w=2}
+[Viewed October 18, 2004]"
 ,annotation="
-       Described how to improve the -rt implementation of realtime RCU.
+       Srivatsa's RCU patch for tcp_ehash lookup.
 "
 }
 
-@unpublished{PaulEMcKenney2006c
-,Author="Paul E. McKenney"
-,Title="Sleepable {RCU}"
+@unpublished{Thirumalai04a
+,Author="Ravikiran Thirumalai"
+,Title="Subject: [patchset] Lockfree fd lookup 0 of 5"
 ,month="October"
-,day="9"
-,year="2006"
+,year="2004"
 ,note="Available:
-\url{http://lwn.net/Articles/202847/}
-Revised:
-\url{http://www.rdrop.com/users/paulmck/RCU/srcu.2007.01.14a.pdf}
-[Viewed August 21, 2006]"
+\url{http://marc.theaimsgroup.com/?t=109144217400003&r=1&w=2}
+[Viewed October 18, 2004]"
 ,annotation="
-       LWN article introducing SRCU.
+       Ravikiran's lockfree FD patch.
+"
+}
+
+@unpublished{Thirumalai04b
+,Author="Ravikiran Thirumalai"
+,Title="Subject: Re: [patchset] Lockfree fd lookup 0 of 5"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=109152521410459&w=2}
+[Viewed October 18, 2004]"
+,annotation="
+       Ravikiran's lockfree FD patch.
+"
+}
+
+@unpublished{PaulEMcKenney2004rcu:assign:pointer
+,Author="Paul E. McKenney"
+,Title="{[PATCH 1/3] RCU: \url{rcu_assign_pointer()} removal of memory barriers}"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://lkml.org/lkml/2004/10/23/241}
+[Viewed June 8, 2010]"
+,annotation="
+       Introduce rcu_assign_pointer().
+"
+}
+
+@unpublished{JamesMorris04a
+,Author="James Morris"
+,Title="{[PATCH 2/3] SELinux} scalability - convert {AVC} to {RCU}"
+,day="15"
+,month="November"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=110054979416004&w=2}
+[Viewed December 10, 2004]"
+,annotation="
+       James Morris posts Kaigai Kohei's patch to LKML.
+"
+}
+
+@unpublished{JamesMorris04b
+,Author="James Morris"
+,Title="Recent Developments in {SELinux} Kernel Performance"
+,month="December"
+,year="2004"
+,note="Available:
+\url{http://www.livejournal.com/users/james_morris/2153.html}
+[Viewed December 10, 2004]"
+,annotation="
+       RCU helps SELinux performance.  ;-)  Made LWN.
+"
+}
+
+@unpublished{PaulMcKenney2005RCUSemantics
+,Author="Paul E. McKenney and Jonathan Walpole"
+,Title="{RCU} Semantics: A First Attempt"
+,month="January"
+,year="2005"
+,day="30"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/rcu-semantics.2005.01.30a.pdf}
+[Viewed December 6, 2009]"
+,annotation="
+       Early derivation of RCU semantics.
+"
+}
+
+@unpublished{PaulMcKenney2005e
+,Author="Paul E. McKenney"
+,Title="Real-Time Preemption and {RCU}"
+,month="March"
+,year="2005"
+,day="17"
+,note="Available:
+\url{http://lkml.org/lkml/2005/3/17/199}
+[Viewed September 5, 2005]"
+,annotation="
+       First posting showing how RCU can be safely adapted for
+       preemptable RCU read side critical sections.
+"
+}
+
+@unpublished{EsbenNeilsen2005a
+,Author="Esben Neilsen"
+,Title="Re: Real-Time Preemption and {RCU}"
+,month="March"
+,year="2005"
+,day="18"
+,note="Available:
+\url{http://lkml.org/lkml/2005/3/18/122}
+[Viewed March 30, 2006]"
+,annotation="
+       Esben Neilsen suggests read-side suppression of grace-period
+       processing for crude-but-workable realtime RCU.  The downside
+       is indefinite grace periods...But this is OK for experimentation
+       and testing.
+"
+}
+
+@unpublished{TomHart05a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
+,Title="Efficient Memory Reclamation is Necessary for Fast Lock-Free
+Data Structures"
+,month="March"
+,year="2005"
+,note="Available:
+\url{ftp://ftp.cs.toronto.edu/csrg-technical-reports/515/}
+[Viewed March 4, 2005]"
+,annotation="
+       Comparison of RCU, QBSR, and EBSR.  RCU wins for read-mostly
+       workloads.  ;-)
+"
+}
+
+@unpublished{JonCorbet2005DeprecateSyncKernel
+,Author="Jonathan Corbet"
+,Title="API change: synchronize_kernel() deprecated"
+,month="May"
+,day="3"
+,year="2005"
+,note="Available:
+\url{http://lwn.net/Articles/134484/}
+[Viewed May 3, 2005]"
+,annotation="
+       Jon Corbet describes deprecation of synchronize_kernel()
+       in favor of synchronize_rcu() and synchronize_sched().
+"
+}
+
+@unpublished{PaulMcKenney05a
+,Author="Paul E. McKenney"
+,Title="{[RFC]} {RCU} and {CONFIG\_PREEMPT\_RT} progress"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/5/9/185}
+[Viewed May 13, 2005]"
+,annotation="
+       First publication of working lock-based deferred free patches
+       for the CONFIG_PREEMPT_RT environment.
+"
+}
+
+@conference{PaulMcKenney05b
+,Author="Paul E. McKenney and Dipankar Sarma"
+,Title="Towards Hard Realtime Response from the {Linux} Kernel on {SMP} Hardware"
+,Booktitle="linux.conf.au 2005"
+,month="April"
+,year="2005"
+,address="Canberra, Australia"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/realtimeRCU.2005.04.23a.pdf}
+[Viewed May 13, 2005]"
+,annotation="
+       Realtime turns into making RCU yet more realtime friendly.
+       http://lca2005.linux.org.au/Papers/Paul%20McKenney/Towards%20Hard%20Realtime%20Response%20from%20the%20Linux%20Kernel/LKS.2005.04.22a.pdf
+"
+}
+
+@unpublished{PaulEMcKenneyHomePage
+,Author="Paul E. McKenney"
+,Title="{Paul} {E.} {McKenney}"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/}
+[Viewed May 25, 2005]"
+,annotation="
+       Paul McKenney's home page.
+"
+}
+
+@unpublished{PaulEMcKenneyRCUPage
+,Author="Paul E. McKenney"
+,Title="Read-Copy Update {(RCU)}"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU}
+[Viewed May 25, 2005]"
+,annotation="
+       Paul McKenney's RCU page.
+"
+}
+
+@unpublished{JosephSeigh2005a
+,Author="Joseph Seigh"
+,Title="{RCU}+{SMR} (hazard pointers)"
+,month="July"
+,year="2005"
+,note="Personal communication"
+,annotation="
+       Joe Seigh announcing his atomic-ptr-plus project.
+       http://sourceforge.net/projects/atomic-ptr-plus/
+"
+}
+
+@unpublished{JosephSeigh2005b
+,Author="Joseph Seigh"
+,Title="Lock-free synchronization primitives"
+,month="July"
+,day="6"
+,year="2005"
+,note="Available:
+\url{http://sourceforge.net/projects/atomic-ptr-plus/}
+[Viewed August 8, 2005]"
+,annotation="
+       Joe Seigh's atomic-ptr-plus project.
+"
+}
+
+@unpublished{PaulMcKenney2005c
+,Author="Paul E.McKenney"
+,Title="{[RFC,PATCH] RCU} and {CONFIG\_PREEMPT\_RT} sane patch"
+,month="August"
+,day="1"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/8/1/155}
+[Viewed March 14, 2006]"
+,annotation="
+       First operating counter-based realtime RCU patch posted to LKML.
+"
+}
+
+@unpublished{PaulMcKenney2005d
+,Author="Paul E. McKenney"
+,Title="Re: [Fwd: Re: [patch] Real-Time Preemption, -RT-2.6.13-rc4-V0.7.52-01]"
+,month="August"
+,day="8"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/8/8/108}
+[Viewed March 14, 2006]"
+,annotation="
+       First operating counter-based realtime RCU patch posted to LKML,
+       but fixed so that various unusual combinations of configuration
+       parameters all function properly.
+"
+}
+
+@unpublished{PaulMcKenney2005rcutorture
+,Author="Paul E. McKenney"
+,Title="{[PATCH]} {RCU} torture testing"
+,month="October"
+,day="1"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/10/1/70}
+[Viewed March 14, 2006]"
+,annotation="
+       First rcutorture patch.
+"
+}
+
+@conference{ThomasEHart2006a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
+,Title="Making Lockless Synchronization Fast: Performance Implications
+of Memory Reclamation"
+,Booktitle="20\textsuperscript{th} {IEEE} International Parallel and
+Distributed Processing Symposium"
+,month="April"
+,year="2006"
+,day="25-29"
+,address="Rhodes, Greece"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/hart_ipdps06.pdf}
+[Viewed April 28, 2008]"
+,annotation="
+       Compares QSBR, HPBR, EBR, and lock-free reference counting.
+       http://www.cs.toronto.edu/~tomhart/perflab/ipdps06.tgz
+"
+}
+
+@unpublished{NickPiggin2006radixtree
+,Author="Nick Piggin"
+,Title="[patch 3/3] radix-tree: {RCU} lockless readside"
+,month="June"
+,day="20"
+,year="2006"
+,note="Available:
+\url{http://lkml.org/lkml/2006/6/20/238}
+[Viewed March 25, 2008]"
+,annotation="
+       RCU-protected radix tree.
+"
+}
+
+@Conference{PaulEMcKenney2006b
+,Author="Paul E. McKenney and Dipankar Sarma and Ingo Molnar and
+Suparna Bhattacharya"
+,Title="Extending {RCU} for Realtime and Embedded Workloads"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="July"
+,Year="2006"
+,pages="v2 123-138"
+,note="Available:
+\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
+[Viewed January 1, 2007]"
+,annotation="
+       Described how to improve the -rt implementation of realtime RCU.
+"
+}
+
+@unpublished{WikipediaRCU
+,Author="Paul E. McKenney and Chris Purcell and Algae and Ben Schumin and
+Gaius Cornelius and Qwertyus and Neil Conway and Sbw and Blainster and
+Canis Rufus and Zoicon5 and Anome and Hal Eisen"
+,Title="Read-Copy Update"
+,month="July"
+,day="8"
+,year="2006"
+,note="Available:
+\url{http://en.wikipedia.org/wiki/Read-copy-update}
+[Viewed August 21, 2006]"
+,annotation="
+       Wikipedia RCU page as of July 8 2006.
+"
+}
+
+@Conference{NickPiggin2006LocklessPageCache
+,Author="Nick Piggin"
+,Title="A Lockless Pagecache in Linux---Introduction, Progress, Performance"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="July"
+,Year="2006"
+,pages="v2 249-254"
+,note="Available:
+\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+[Viewed January 11, 2009]"
+,annotation="
+       Uses RCU-protected radix tree for a lockless page cache.
+"
+}
+
+@unpublished{PaulEMcKenney2006c
+,Author="Paul E. McKenney"
+,Title="Sleepable {RCU}"
+,month="October"
+,day="9"
+,year="2006"
+,note="Available:
+\url{http://lwn.net/Articles/202847/}
+Revised:
+\url{http://www.rdrop.com/users/paulmck/RCU/srcu.2007.01.14a.pdf}
+[Viewed August 21, 2006]"
+,annotation="
+       LWN article introducing SRCU.
 "
 }
 
 @unpublished{RobertOlsson2006a
 ,Author="Robert Olsson and Stefan Nilsson"
 ,Title="{TRASH}: A dynamic {LC}-trie and hash data structure"
-,month="August"
-,day="18"
-,year="2006"
+,month="August"
+,day="18"
+,year="2006"
+,note="Available:
+\url{http://www.nada.kth.se/~snilsson/publications/TRASH/trash.pdf}
+[Viewed March 4, 2011]"
+,annotation="
+       RCU-protected dynamic trie-hash combination.
+"
+}
+
+@unpublished{ChristophHellwig2006RCU2SRCU
+,Author="Christoph Hellwig"
+,Title="Re: {[-mm PATCH 1/4]} {RCU}: split classic rcu"
+,month="September"
+,day="28"
+,year="2006"
+,note="Available:
+\url{http://lkml.org/lkml/2006/9/28/160}
+[Viewed March 27, 2008]"
+}
+
+@unpublished{PaulEMcKenneyRCUusagePage
+,Author="Paul E. McKenney"
+,Title="{RCU} {Linux} Usage"
+,month="October"
+,year="2006"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/linuxusage.html}
+[Viewed January 14, 2007]"
+,annotation="
+       Paul McKenney's RCU page showing graphs plotting Linux-kernel
+       usage of RCU.
+"
+}
+
+@unpublished{PaulEMcKenneyRCUusageRawDataPage
+,Author="Paul E. McKenney"
+,Title="Read-Copy Update {(RCU)} Usage in {Linux} Kernel"
+,month="October"
+,year="2006"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html}
+[Viewed January 14, 2007]"
+,annotation="
+       Paul McKenney's RCU page showing Linux usage of RCU in tabular
+       form, with links to corresponding cscope databases.
+"
+}
+
+@unpublished{GauthamShenoy2006RCUrwlock
+,Author="Gautham R. Shenoy"
+,Title="[PATCH 4/5] lock\_cpu\_hotplug: Redesign - Lightweight implementation of lock\_cpu\_hotplug"
+,month="October"
+,year="2006"
+,day=26
+,note="Available:
+\url{http://lkml.org/lkml/2006/10/26/73}
+[Viewed January 26, 2009]"
+,annotation="
+       RCU-based reader-writer lock that allows readers to proceed with
+       no memory barriers or atomic instruction in absence of writers.
+       If writer do show up, readers must of course wait as required by
+       the semantics of reader-writer locking.  This is a recursive
+       lock.
+"
+}
+
+@unpublished{JensAxboe2006SlowSRCU
+,Author="Jens Axboe"
+,Title="Re: [patch] cpufreq: mark \url{cpufreq_tsc()} as
+\url{core_initcall_sync}"
+,month="November"
+,year="2006"
+,day=17
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/17/56}
+[Viewed May 28, 2007]"
+,annotation="
+       SRCU's grace periods are too slow for Jens, even after a
+       factor-of-three speedup.
+       Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359.
+"
+}
+
+@unpublished{OlegNesterov2006QRCU
+,Author="Oleg Nesterov"
+,Title="Re: [patch] cpufreq: mark {\tt cpufreq\_tsc()} as
+{\tt core\_initcall\_sync}"
+,month="November"
+,year="2006"
+,day=19
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/19/69}
+[Viewed May 28, 2007]"
+,annotation="
+       First cut of QRCU.  Expanded/corrected versions followed.
+       Used to be OlegNesterov2007QRCU, now time-corrected.
+"
+}
+
+@unpublished{OlegNesterov2006aQRCU
+,Author="Oleg Nesterov"
+,Title="Re: [RFC, PATCH 1/2] qrcu: {"quick"} srcu implementation"
+,month="November"
+,year="2006"
+,day=30
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/29/330}
+[Viewed November 26, 2008]"
+,annotation="
+       Expanded/corrected version of QRCU.
+       Used to be OlegNesterov2007aQRCU, now time-corrected.
+"
+}
+
+@unpublished{EvgeniyPolyakov2006RCUslowdown
+,Author="Evgeniy Polyakov"
+,Title="Badness in postponing work"
+,month="December"
+,year="2006"
+,day=05
+,note="Available:
+\url{http://www.ioremap.net/node/41}
+[Viewed October 28, 2008]"
+,annotation="
+       Using RCU as a pure delay leads to a 2.5x slowdown in skbs in
+       the Linux kernel.
+"
+}
+
+@inproceedings{ChrisMatthews2006ClusteredObjectsRCU
+,author = {Matthews, Chris and Coady, Yvonne and Appavoo, Jonathan}
+,title = {Portability events: a programming model for scalable system infrastructures}
+,booktitle = {PLOS '06: Proceedings of the 3rd workshop on Programming languages and operating systems}
+,year = {2006}
+,isbn = {1-59593-577-0}
+,pages = {11}
+,location = {San Jose, California}
+,doi = {http://doi.acm.org/10.1145/1215995.1216006}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+       Uses K42's RCU-like functionality to manage clustered-object
+       lifetimes.
+}}
+
+@article{DilmaDaSilva2006K42
+,author = {Silva, Dilma Da and Krieger, Orran and Wisniewski, Robert W. and Waterland, Amos and Tam, David and Baumann, Andrew}
+,title = {K42: an infrastructure for operating system research}
+,journal = {SIGOPS Oper. Syst. Rev.}
+,volume = {40}
+,number = {2}
+,year = {2006}
+,issn = {0163-5980}
+,pages = {34--42}
+,doi = {http://doi.acm.org/10.1145/1131322.1131333}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+       Describes relationship of K42 generations to RCU.
+}}
+
+# CoreyMinyard2007list_splice_rcu
+@unpublished{CoreyMinyard2007list:splice:rcu
+,Author="Corey Minyard and Paul E. McKenney"
+,Title="{[PATCH]} add an {RCU} version of list splicing"
+,month="January"
+,year="2007"
+,day=3
+,note="Available:
+\url{http://lkml.org/lkml/2007/1/3/112}
+[Viewed May 28, 2007]"
+,annotation="
+       Patch for list_splice_rcu().
+"
+}
+
+@unpublished{PaulEMcKenney2007rcubarrier
+,Author="Paul E. McKenney"
+,Title="{RCU} and Unloadable Modules"
+,month="January"
+,day="14"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/217484/}
+[Viewed November 22, 2007]"
+,annotation="
+       LWN article introducing the rcu_barrier() primitive.
+"
+}
+
+@unpublished{PeterZijlstra2007SyncBarrier
+,Author="Peter Zijlstra and Ingo Molnar"
+,Title="{[PATCH 3/7]} barrier: a scalable synchonisation barrier"
+,month="January"
+,year="2007"
+,day=28
+,note="Available:
+\url{http://lkml.org/lkml/2007/1/28/34}
+[Viewed March 27, 2008]"
+,annotation="
+       RCU-like implementation for frequent updaters and rare readers(!).
+       Subsumed into QRCU.  Maybe...
+"
+}
+
+@unpublished{PaulEMcKenney2007BoostRCU
+,Author="Paul E. McKenney"
+,Title="Priority-Boosting {RCU} Read-Side Critical Sections"
+,month="February"
+,day="5"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/220677/}
+Revised:
+\url{http://www.rdrop.com/users/paulmck/RCU/RCUbooststate.2007.04.16a.pdf}
+[Viewed September 7, 2007]"
+,annotation="
+       LWN article introducing RCU priority boosting.
+"
+}
+
+@unpublished{PaulMcKenney2007QRCUpatch
+,Author="Paul E. McKenney"
+,Title="{[PATCH]} {QRCU} with lockless fastpath"
+,month="February"
+,year="2007"
+,day=24
+,note="Available:
+\url{http://lkml.org/lkml/2007/2/25/18}
+[Viewed March 27, 2008]"
+,annotation="
+       Patch for QRCU supplying lock-free fast path.
+"
+}
+
+@article{JonathanAppavoo2007K42RCU
+,author = {Appavoo, Jonathan and Silva, Dilma Da and Krieger, Orran and Auslander, Marc and Ostrowski, Michal and Rosenburg, Bryan and Waterland, Amos and Wisniewski, Robert W. and Xenidis, Jimi and Stumm, Michael and Soares, Livio}
+,title = {Experience distributing objects in an SMMP OS}
+,journal = {ACM Trans. Comput. Syst.}
+,volume = {25}
+,number = {3}
+,year = {2007}
+,issn = {0734-2071}
+,pages = {6/1--6/52}
+,doi = {http://doi.acm.org/10.1145/1275517.1275518}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+       Role of RCU in K42.
+}}
+
+@conference{RobertOlsson2007Trash
+,Author="Robert Olsson and Stefan Nilsson"
+,Title="{TRASH}: A dynamic {LC}-trie and hash data structure"
+,booktitle="Workshop on High Performance Switching and Routing (HPSR'07)"
+,month="May"
+,year="2007"
 ,note="Available:
-\url{http://www.nada.kth.se/~snilsson/public/papers/trash/trash.pdf}
-[Viewed February 24, 2007]"
+\url{http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4281239}
+[Viewed October 1, 2010]"
 ,annotation="
        RCU-protected dynamic trie-hash combination.
 "
 }
 
-@unpublished{ThomasEHart2007a
-,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown and Jonathan Walpole"
-,Title="Performance of memory reclamation for lockless synchronization"
-,journal="J. Parallel Distrib. Comput."
+@conference{PeterZijlstra2007ConcurrentPagecacheRCU
+,Author="Peter Zijlstra"
+,Title="Concurrent Pagecache"
+,Booktitle="Linux Symposium"
+,month="June"
+,year="2007"
+,address="Ottawa, Canada"
+,note="Available:
+\url{http://ols.108.redhat.com/2007/Reprints/zijlstra-Reprint.pdf}
+[Viewed April 14, 2008]"
+,annotation="
+       Page-cache modifications permitting RCU readers and concurrent
+       updates.
+"
+}
+
+@unpublished{PaulEMcKenney2007whatisRCU
+,Author="Paul E. McKenney"
+,Title="What is {RCU}?"
 ,year="2007"
-,note="To appear in J. Parallel Distrib. Comput.
-       \url{doi=10.1016/j.jpdc.2007.04.010}"
+,month="07"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/whatisRCU.html}
+[Viewed July 6, 2007]"
 ,annotation={
-       Compares QSBR (AKA "classic RCU"), HPBR, EBR, and lock-free
-       reference counting.  Journal version of ThomasEHart2006a.
+       Describes RCU in Linux kernel.
 }
 }
 
 @unpublished{PaulEMcKenney2007QRCUspin
 ,Author="Paul E. McKenney"
-,Title="Using Promela and Spin to verify parallel algorithms"
+,Title="Using {Promela} and {Spin} to verify parallel algorithms"
 ,month="August"
 ,day="1"
 ,year="2007"
@@ -669,6 +1704,50 @@ Revised:
 ,annotation="
        LWN article describing Promela and spin, and also using Oleg
        Nesterov's QRCU as an example (with Paul McKenney's fastpath).
+       Merged patch at: http://lkml.org/lkml/2007/2/25/18
+"
+}
+
+@unpublished{PaulEMcKenney2007WG21DDOatomics
+,Author="Paul E. McKenney and Hans-J. Boehm and Lawrence Crowl"
+,Title="C++ Data-Dependency Ordering: Atomics and Memory Model"
+,month="August"
+,day="3"
+,year="2007"
+,note="Preprint:
+\url{http://open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2664.htm}
+[Viewed December 7, 2009]"
+,annotation="
+       RCU for C++, parts 1 and 2.
+"
+}
+
+@unpublished{PaulEMcKenney2007WG21DDOannotation
+,Author="Paul E. McKenney and Lawrence Crowl"
+,Title="C++ Data-Dependency Ordering: Function Annotation"
+,month="September"
+,day="18"
+,year="2008"
+,note="Preprint:
+\url{http://open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2782.htm}
+[Viewed December 7, 2009]"
+,annotation="
+       RCU for C++, part 2, updated many times.
+"
+}
+
+@unpublished{PaulEMcKenney2007PreemptibleRCUPatch
+,Author="Paul E. McKenney"
+,Title="[PATCH RFC 0/9] {RCU}: Preemptible {RCU}"
+,month="September"
+,day="10"
+,year="2007"
+,note="Available:
+\url{http://lkml.org/lkml/2007/9/10/213}
+[Viewed October 25, 2007]"
+,annotation="
+       Final patch for preemptable RCU to -rt.  (Later patches were
+       to mainline, eventually incorporated.)
 "
 }
 
@@ -686,10 +1765,46 @@ Revised:
 "
 }
 
+@article{ThomasEHart2007a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown and Jonathan Walpole"
+,Title="Performance of memory reclamation for lockless synchronization"
+,journal="J. Parallel Distrib. Comput."
+,volume={67}
+,number="12"
+,year="2007"
+,issn="0743-7315"
+,pages="1270--1285"
+,doi="http://dx.doi.org/10.1016/j.jpdc.2007.04.010"
+,publisher="Academic Press, Inc."
+,address="Orlando, FL, USA"
+,annotation={
+       Compares QSBR, HPBR, EBR, and lock-free reference counting.
+       Journal version of ThomasEHart2006a.
+}
+}
+
+@unpublished{MathieuDesnoyers2007call:rcu:schedNeeded
+,Author="Mathieu Desnoyers"
+,Title="Re: [patch 1/2] {Linux} Kernel Markers - Support Multiple Probes"
+,month="December"
+,day="20"
+,year="2007"
+,note="Available:
+\url{http://lkml.org/lkml/2007/12/20/244}
+[Viewed March 27, 2008]"
+,annotation="
+       Request for call_rcu_sched() and rcu_barrier_sched().
+"
+}
+
+
 ########################################################################
 #
 #      "What is RCU?" LWN series.
 #
+#      http://lwn.net/Articles/262464/ (What is RCU, Fundamentally?)
+#      http://lwn.net/Articles/263130/ (What is RCU's Usage?)
+#      http://lwn.net/Articles/264090/ (What is RCU's API?)
 
 @unpublished{PaulEMcKenney2007WhatIsRCUFundamentally
 ,Author="Paul E. McKenney and Jonathan Walpole"
@@ -723,7 +1838,7 @@ Revised:
        3. RCU is a Bulk Reference-Counting Mechanism
        4. RCU is a Poor Man's Garbage Collector
        5. RCU is a Way of Providing Existence Guarantees
-       6. RCU is a Way of Waiting for Things to Finish 
+       6. RCU is a Way of Waiting for Things to Finish
 "
 }
 
@@ -747,20 +1862,96 @@ Revised:
 #
 ########################################################################
 
+
+@unpublished{SteveRostedt2008dyntickRCUpatch
+,Author="Steven Rostedt and Paul E. McKenney"
+,Title="{[PATCH]} add support for dynamic ticks and preempt rcu"
+,month="January"
+,day="29"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/1/29/208}
+[Viewed March 27, 2008]"
+,annotation="
+       Patch that prevents preemptible RCU from unnecessarily waking
+       up dynticks-idle CPUs.
+"
+}
+
+@unpublished{PaulEMcKenney2008LKMLDependencyOrdering
+,Author="Paul E. McKenney"
+,Title="Re: [PATCH 02/22 -v7] Add basic support for gcc profiler instrumentation"
+,month="February"
+,day="1"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/2/2/255}
+[Viewed October 18, 2008]"
+,annotation="
+       Explanation of compilers violating dependency ordering.
+"
+}
+
+@Conference{PaulEMcKenney2008Beijing
+,Author="Paul E. McKenney"
+,Title="Introducing Technology Into {Linux} Or:
+Introducing your technology Into {Linux} will require introducing a
+lot of {Linux} into your technology!!!"
+,Booktitle="2008 Linux Developer Symposium - China"
+,Publisher="OSS China"
+,Month="February"
+,Year="2008"
+,Address="Beijing, China"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/TechIntroLinux.2008.02.19a.pdf}
+[Viewed August 12, 2008]"
+}
+
+@unpublished{PaulEMcKenney2008dynticksRCU
+,Author="Paul E. McKenney and Steven Rostedt"
+,Title="Integrating and Validating dynticks and Preemptable RCU"
+,month="April"
+,day="24"
+,year="2008"
+,note="Available:
+\url{http://lwn.net/Articles/279077/}
+[Viewed April 24, 2008]"
+,annotation="
+       Describes use of Promela and Spin to validate (and fix!) the
+       dynticks/RCU interface.
+"
+}
+
 @article{DinakarGuniguntala2008IBMSysJ
 ,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole"
 ,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}"
 ,Year="2008"
-,Month="April"
+,Month="April-June"
 ,journal="IBM Systems Journal"
 ,volume="47"
 ,number="2"
-,pages="@@-@@"
+,pages="221-236"
 ,annotation="
        RCU, realtime RCU, sleepable RCU, performance.
 "
 }
 
+@unpublished{LaiJiangshan2008NewClassicAlgorithm
+,Author="Lai Jiangshan"
+,Title="[{RFC}][{PATCH}] rcu classic: new algorithm for callbacks-processing"
+,month="June"
+,day="3"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/6/2/539}
+[Viewed December 10, 2008]"
+,annotation="
+       Updated RCU classic algorithm.  Introduced multi-tailed list
+       for RCU callbacks and also pulling common code into
+       __call_rcu().
+"
+}
+
 @article{PaulEMcKenney2008RCUOSR
 ,author="Paul E. McKenney and Jonathan Walpole"
 ,title="Introducing technology into the {Linux} kernel: a case study"
@@ -778,6 +1969,52 @@ Revised:
 }
 }
 
+@unpublished{ManfredSpraul2008StateMachineRCU
+,Author="Manfred Spraul"
+,Title="[{RFC}, {PATCH}] state machine based rcu"
+,month="August"
+,day="21"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/8/21/336}
+[Viewed December 8, 2008]"
+,annotation="
+       State-based RCU.  One key thing that this patch does is to
+       separate the dynticks handling of NMIs and IRQs.
+"
+}
+
+@unpublished{ManfredSpraul2008dyntickIRQNMI
+,Author="Manfred Spraul"
+,Title="Re: [{RFC}, {PATCH}] v4 scalable classic {RCU} implementation"
+,month="September"
+,day="6"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/9/6/86}
+[Viewed December 8, 2008]"
+,annotation="
+       Manfred notes a fix required to my attempt to separate irq
+       and NMI processing for hierarchical RCU's dynticks interface.
+"
+}
+
+@techreport{PaulEMcKenney2008cyclicRCU
+,author="Paul E. McKenney"
+,title="Efficient Support of Consistent Cyclic Search With Read-Copy Update"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2008"
+,number="US Patent 7,426,511"
+,month="September"
+,pages="23"
+,annotation="
+       Maintains an additional level of indirection to allow
+       readers to confine themselves to the desired snapshot of the
+       data structure.  Only permits one update at a time.
+"
+}
+
 @unpublished{PaulEMcKenney2008HierarchicalRCU
 ,Author="Paul E. McKenney"
 ,Title="Hierarchical {RCU}"
@@ -793,6 +2030,21 @@ Revised:
 "
 }
 
+@unpublished{PaulEMcKenney2009BloatwatchRCU
+,Author="Paul E. McKenney"
+,Title="Re: [PATCH fyi] RCU: the bloatwatch edition"
+,month="January"
+,day="14"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/1/14/449}
+[Viewed January 15, 2009]"
+,annotation="
+       Small-footprint implementation of RCU for uniprocessor
+       embedded applications -- and also for exposition purposes.
+"
+}
+
 @conference{PaulEMcKenney2009MaliciousURCU
 ,Author="Paul E. McKenney"
 ,Title="Using a Malicious User-Level {RCU} to Torture {RCU}-Based Algorithms"
@@ -816,15 +2068,17 @@ Revised:
 ,year="2009"
 ,note="Available:
 \url{http://lkml.org/lkml/2009/2/5/572}
-\url{git://lttng.org/userspace-rcu.git}
+\url{http://lttng.org/urcu}
 [Viewed February 20, 2009]"
 ,annotation="
        Mathieu Desnoyers's user-space RCU implementation.
        git://lttng.org/userspace-rcu.git
+       http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git
+       http://lttng.org/urcu
 "
 }
 
-@unpublished{PaulEMcKenney2009BloatWatchRCU
+@unpublished{PaulEMcKenney2009LWNBloatWatchRCU
 ,Author="Paul E. McKenney"
 ,Title="{RCU}: The {Bloatwatch} Edition"
 ,month="March"
@@ -852,14 +2106,29 @@ Revised:
 "
 }
 
-@unpublished{JoshTriplett2009RPHash
+@unpublished{PaulEMcKenney2009fastRTRCU
+,Author="Paul E. McKenney"
+,Title="[{PATCH} {RFC} -tip 0/4] {RCU} cleanups and simplified preemptable {RCU}"
+,month="July"
+,day="23"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/7/23/294}
+[Viewed August 15, 2009]"
+,annotation="
+       First posting of simple and fast preemptable RCU.
+"
+}
+
+@InProceedings{JoshTriplett2009RPHash
 ,Author="Josh Triplett"
 ,Title="Scalable concurrent hash tables via relativistic programming"
 ,month="September"
 ,year="2009"
-,note="Linux Plumbers Conference presentation"
+,booktitle="Linux Plumbers Conference 2009"
 ,annotation="
        RP fun with hash tables.
+       See also JoshTriplett2010RPHash
 "
 }
 
@@ -872,4 +2141,323 @@ Revised:
 ,note="Available:
 \url{http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf}
 [Viewed December 9, 2009]"
+,annotation={
+       Chapter 6 (page 97) covers user-level RCU.
+}
+}
+
+@unpublished{RelativisticProgrammingWiki
+,Author="Josh Triplett and Paul E. McKenney and Jonathan Walpole"
+,Title="Relativistic Programming"
+,month="September"
+,year="2009"
+,note="Available:
+\url{http://wiki.cs.pdx.edu/rp/}
+[Viewed December 9, 2009]"
+,annotation="
+       Main Relativistic Programming Wiki.
+"
+}
+
+@conference{PaulEMcKenney2009DeterministicRCU
+,Author="Paul E. McKenney"
+,Title="Deterministic Synchronization in Multicore Systems: the Role of {RCU}"
+,Booktitle="Eleventh Real Time Linux Workshop"
+,month="September"
+,year="2009"
+,address="Dresden, Germany"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/realtime/paper/DetSyncRCU.2009.08.18a.pdf}
+[Viewed January 14, 2009]"
+}
+
+@unpublished{PaulEMcKenney2009HuntingHeisenbugs
+,Author="Paul E. McKenney"
+,Title="Hunting Heisenbugs"
+,month="November"
+,year="2009"
+,day="1"
+,note="Available:
+\url{http://paulmck.livejournal.com/14639.html}
+[Viewed June 4, 2010]"
+,annotation="
+       Day-one bug in Tree RCU that took forever to track down.
+"
+}
+
+@unpublished{MathieuDesnoyers2009defer:rcu
+,Author="Mathieu Desnoyers"
+,Title="Kernel RCU: shrink the size of the struct rcu\_head"
+,month="December"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/10/18/129}
+[Viewed December 29, 2009]"
+,annotation="
+       Mathieu proposed defer_rcu() with fixed-size per-thread pool
+       of RCU callbacks.
+"
+}
+
+@unpublished{MathieuDesnoyers2009VerifPrePub
+,Author="Mathieu Desnoyers and Paul E. McKenney and Michel R. Dagenais"
+,Title="Multi-Core Systems Modeling for Formal Verification of Parallel Algorithms"
+,month="December"
+,year="2009"
+,note="Submitted to IEEE TPDS"
+,annotation="
+       OOMem model for Mathieu's user-level RCU mechanical proof of
+       correctness.
+"
+}
+
+@unpublished{MathieuDesnoyers2009URCUPrePub
+,Author="Mathieu Desnoyers and Paul E. McKenney and Alan Stern and Michel R. Dagenais and Jonathan Walpole"
+,Title="User-Level Implementations of Read-Copy Update"
+,month="December"
+,year="2010"
+,url=\url{http://www.computer.org/csdl/trans/td/2012/02/ttd2012020375-abs.html}
+,annotation="
+       RCU overview, desiderata, semi-formal semantics, user-level RCU
+       usage scenarios, three classes of RCU implementation, wait-free
+       RCU updates, RCU grace-period batching, update overhead,
+       http://www.rdrop.com/users/paulmck/RCU/urcu-main-accepted.2011.08.30a.pdf
+       http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
+       Superseded by MathieuDesnoyers2012URCU.
+"
+}
+
+@inproceedings{HariKannan2009DynamicAnalysisRCU
+,author = {Kannan, Hari}
+,title = {Ordering decoupled metadata accesses in multiprocessors}
+,booktitle = {MICRO 42: Proceedings of the 42nd Annual IEEE/ACM International Symposium on Microarchitecture}
+,year = {2009}
+,isbn = {978-1-60558-798-1}
+,pages = {381--390}
+,location = {New York, New York}
+,doi = {http://doi.acm.org/10.1145/1669112.1669161}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+       Uses RCU to protect metadata used in dynamic analysis.
+}}
+
+@conference{PaulEMcKenney2010SimpleOptRCU
+,Author="Paul E. McKenney"
+,Title="Simplicity Through Optimization"
+,Booktitle="linux.conf.au 2010"
+,month="January"
+,year="2010"
+,address="Wellington, New Zealand"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/SimplicityThruOptimization.2010.01.21f.pdf}
+[Viewed October 10, 2010]"
+,annotation="
+       TREE_PREEMPT_RCU optimizations greatly simplified the old
+       PREEMPT_RCU implementation.
+"
+}
+
+@unpublished{PaulEMcKenney2010LockdepRCU
+,Author="Paul E. McKenney"
+,Title="Lockdep-{RCU}"
+,month="February"
+,year="2010"
+,day="1"
+,note="Available:
+\url{https://lwn.net/Articles/371986/}
+[Viewed June 4, 2010]"
+,annotation="
+       CONFIG_PROVE_RCU, or at least an early version.
+"
+}
+
+@unpublished{AviKivity2010KVM2RCU
+,Author="Avi Kivity"
+,Title="[{PATCH} 37/40] {KVM}: Bump maximum vcpu count to 64"
+,month="February"
+,year="2010"
+,note="Available:
+\url{http://www.mail-archive.com/kvm@vger.kernel.org/msg28640.html}
+[Viewed March 20, 2010]"
+,annotation="
+       Use of RCU permits KVM to increase the size of guest OSes from
+       16 CPUs to 64 CPUs.
+"
+}
+
+@unpublished{HerbertXu2010RCUResizeHash
+,Author="Herbert Xu"
+,Title="bridge: Add core IGMP snooping support"
+,month="February"
+,year="2010"
+,note="Available:
+\url{http://kerneltrap.com/mailarchive/linux-netdev/2010/2/26/6270589}
+[Viewed March 20, 2011]"
+,annotation={
+       Use a pair of list_head structures to support RCU-protected
+       resizable hash tables.
+}}
+
+@article{JoshTriplett2010RPHash
+,author="Josh Triplett and Paul E. McKenney and Jonathan Walpole"
+,title="Scalable Concurrent Hash Tables via Relativistic Programming"
+,journal="ACM Operating Systems Review"
+,year=2010
+,volume=44
+,number=3
+,month="July"
+,annotation={
+       RP fun with hash tables.
+       http://portal.acm.org/citation.cfm?id=1842733.1842750
+}}
+
+@unpublished{PaulEMcKenney2010RCUAPI
+,Author="Paul E. McKenney"
+,Title="The {RCU} {API}, 2010 Edition"
+,month="December"
+,day="8"
+,year="2010"
+,note="Available:
+\url{http://lwn.net/Articles/418853/}
+[Viewed December 8, 2010]"
+,annotation="
+       Includes updated software-engineering features.
+"
+}
+
+@mastersthesis{AndrejPodzimek2010masters
+,author="Andrej Podzimek"
+,title="Read-Copy-Update for OpenSolaris"
+,school="Charles University in Prague"
+,year="2010"
+,note="Available:
+\url{https://andrej.podzimek.org/thesis.pdf}
+[Viewed January 31, 2011]"
+,annotation={
+       Reviews RCU implementations and creates a few for OpenSolaris.
+       Drives quiescent-state detection from RCU read-side primitives,
+       in a manner roughly similar to that of Jim Houston.
+}}
+
+@unpublished{LinusTorvalds2011Linux2:6:38:rc1:NPigginVFS
+,Author="Linus Torvalds"
+,Title="Linux 2.6.38-rc1"
+,month="January"
+,year="2011"
+,note="Available:
+\url{https://lkml.org/lkml/2011/1/18/322}
+[Viewed March 4, 2011]"
+,annotation={
+       "The RCU-based name lookup is at the other end of the spectrum - the
+       absolute anti-gimmick. It's some seriously good stuff, and gets rid of
+       the last main global lock that really tends to hurt some kernel loads.
+       The dentry lock is no longer a big serializing issue. What's really
+       nice about it is that it actually improves performance a lot even for
+       single-threaded loads (on an SMP kernel), because it gets rid of some
+       of the most expensive parts of path component lookup, which was the
+       d_lock on every component lookup. So I'm seeing improvements of 30-50%
+       on some seriously pathname-lookup intensive loads."
+}}
+
+@techreport{JoshTriplett2011RPScalableCorrectOrdering
+,author = {Josh Triplett and Philip W. Howard and Paul E. McKenney and Jonathan Walpole}
+,title = {Scalable Correct Memory Ordering via Relativistic Programming}
+,year = {2011}
+,number = {11-03}
+,institution = {Portland State University}
+,note = {\url{http://www.cs.pdx.edu/pdfs/tr1103.pdf}}
+}
+
+@inproceedings{PhilHoward2011RCUTMRBTree
+,author = {Philip W. Howard and Jonathan Walpole}
+,title = {A Relativistic Enhancement to Software Transactional Memory}
+,booktitle = {Proceedings of the 3rd USENIX conference on Hot topics in parallelism}
+,series = {HotPar'11}
+,year = {2011}
+,location = {Berkeley, CA}
+,pages = {1--6}
+,numpages = {6}
+,url = {http://www.usenix.org/event/hotpar11/tech/final_files/Howard.pdf}
+,publisher = {USENIX Association}
+,address = {Berkeley, CA, USA}
+}
+
+@techreport{PaulEMcKenney2011cyclicparallelRCU
+,author="Paul E. McKenney and Jonathan Walpole"
+,title="Efficient Support of Consistent Cyclic Search With Read-Copy Update and Parallel Updates"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2011"
+,number="US Patent 7,953,778"
+,month="May"
+,pages="34"
+,annotation="
+       Maintains an array of generation numbers to track in-flight
+       updates and keeps an additional level of indirection to allow
+       readers to confine themselves to the desired snapshot of the
+       data structure.
+"
+}
+
+@inproceedings{Triplett:2011:RPHash
+,author = {Triplett, Josh and McKenney, Paul E. and Walpole, Jonathan}
+,title = {Resizable, Scalable, Concurrent Hash Tables via Relativistic Programming}
+,booktitle = {Proceedings of the 2011 USENIX Annual Technical Conference}
+,month = {June}
+,year = {2011}
+,pages = {145--158}
+,numpages = {14}
+,url={http://www.usenix.org/event/atc11/tech/final_files/atc11_proceedings.pdf}
+,publisher = {The USENIX Association}
+,address = {Portland, OR USA}
+}
+
+@unpublished{PaulEMcKenney2011RCU3.0trainwreck
+,Author="Paul E. McKenney"
+,Title="3.0 and {RCU:} what went wrong"
+,month="July"
+,day="27"
+,year="2011"
+,note="Available:
+\url{http://lwn.net/Articles/453002/}
+[Viewed July 27, 2011]"
+,annotation="
+       Analysis of the RCU trainwreck in Linux kernel 3.0.
+"
+}
+
+@unpublished{NeilBrown2011MeetTheLockers
+,Author="Neil Brown"
+,Title="Meet the Lockers"
+,month="August"
+,day="3"
+,year="2011"
+,note="Available:
+\url{http://lwn.net/Articles/453685/}
+[Viewed September 2, 2011]"
+,annotation="
+       The Locker family as an analogy for locking, reference counting,
+       RCU, and seqlock.
+"
+}
+
+@article{MathieuDesnoyers2012URCU
+,Author="Mathieu Desnoyers and Paul E. McKenney and Alan Stern and Michel R. Dagenais and Jonathan Walpole"
+,Title="User-Level Implementations of Read-Copy Update"
+,journal="IEEE Transactions on Parallel and Distributed Systems"
+,volume={23}
+,year="2012"
+,issn="1045-9219"
+,pages="375-382"
+,doi="http://doi.ieeecomputersociety.org/10.1109/TPDS.2011.159"
+,publisher="IEEE Computer Society"
+,address="Los Alamitos, CA, USA"
+,annotation={
+       RCU overview, desiderata, semi-formal semantics, user-level RCU
+       usage scenarios, three classes of RCU implementation, wait-free
+       RCU updates, RCU grace-period batching, update overhead,
+       http://www.rdrop.com/users/paulmck/RCU/urcu-main-accepted.2011.08.30a.pdf
+       http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
+}
 }
index bff2d8b..5c8d749 100644 (file)
@@ -180,6 +180,20 @@ over a rather long period of time, but improvements are always welcome!
        operations that would not normally be undertaken while a real-time
        workload is running.
 
+       In particular, if you find yourself invoking one of the expedited
+       primitives repeatedly in a loop, please do everyone a favor:
+       Restructure your code so that it batches the updates, allowing
+       a single non-expedited primitive to cover the entire batch.
+       This will very likely be faster than the loop containing the
+       expedited primitive, and will be much much easier on the rest
+       of the system, especially to real-time workloads running on
+       the rest of the system.
+
+       In addition, it is illegal to call the expedited forms from
+       a CPU-hotplug notifier, or while holding a lock that is acquired
+       by a CPU-hotplug notifier.  Failing to observe this restriction
+       will result in deadlock.
+
 7.     If the updater uses call_rcu() or synchronize_rcu(), then the
        corresponding readers must use rcu_read_lock() and
        rcu_read_unlock().  If the updater uses call_rcu_bh() or
index 083d88c..523364e 100644 (file)
@@ -12,14 +12,38 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
        This kernel configuration parameter defines the period of time
        that RCU will wait from the beginning of a grace period until it
        issues an RCU CPU stall warning.  This time period is normally
-       ten seconds.
+       sixty seconds.
 
-RCU_SECONDS_TILL_STALL_RECHECK
+       This configuration parameter may be changed at runtime via the
+       /sys/module/rcutree/parameters/rcu_cpu_stall_timeout, however
+       this parameter is checked only at the beginning of a cycle.
+       So if you are 30 seconds into a 70-second stall, setting this
+       sysfs parameter to (say) five will shorten the timeout for the
+       -next- stall, or the following warning for the current stall
+       (assuming the stall lasts long enough).  It will not affect the
+       timing of the next warning for the current stall.
 
-       This macro defines the period of time that RCU will wait after
-       issuing a stall warning until it issues another stall warning
-       for the same stall.  This time period is normally set to three
-       times the check interval plus thirty seconds.
+       Stall-warning messages may be enabled and disabled completely via
+       /sys/module/rcutree/parameters/rcu_cpu_stall_suppress.
+
+CONFIG_RCU_CPU_STALL_VERBOSE
+
+       This kernel configuration parameter causes the stall warning to
+       also dump the stacks of any tasks that are blocking the current
+       RCU-preempt grace period.
+
+RCU_CPU_STALL_INFO
+
+       This kernel configuration parameter causes the stall warning to
+       print out additional per-CPU diagnostic information, including
+       information on scheduling-clock ticks and RCU's idle-CPU tracking.
+
+RCU_STALL_DELAY_DELTA
+
+       Although the lockdep facility is extremely useful, it does add
+       some overhead.  Therefore, under CONFIG_PROVE_RCU, the
+       RCU_STALL_DELAY_DELTA macro allows five extra seconds before
+       giving an RCU CPU stall warning message.
 
 RCU_STALL_RAT_DELAY
 
@@ -64,6 +88,54 @@ INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffi
 
 This is rare, but does happen from time to time in real life.
 
+If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set,
+more information is printed with the stall-warning message, for example:
+
+       INFO: rcu_preempt detected stall on CPU
+       0: (63959 ticks this GP) idle=241/3fffffffffffffff/0
+          (t=65000 jiffies)
+
+In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is
+printed:
+
+       INFO: rcu_preempt detected stall on CPU
+       0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1
+          (t=65000 jiffies)
+
+The "(64628 ticks this GP)" indicates that this CPU has taken more
+than 64,000 scheduling-clock interrupts during the current stalled
+grace period.  If the CPU was not yet aware of the current grace
+period (for example, if it was offline), then this part of the message
+indicates how many grace periods behind the CPU is.
+
+The "idle=" portion of the message prints the dyntick-idle state.
+The hex number before the first "/" is the low-order 12 bits of the
+dynticks counter, which will have an even-numbered value if the CPU is
+in dyntick-idle mode and an odd-numbered value otherwise.  The hex
+number between the two "/"s is the value of the nesting, which will
+be a small positive number if in the idle loop and a very large positive
+number (as shown above) otherwise.
+
+For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the
+CPU is not in the process of trying to force itself into dyntick-idle
+state, the "." indicates that the CPU has not given up forcing RCU
+into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1"
+indicates that the CPU has not recented forced RCU into dyntick-idle
+mode (it would otherwise indicate the number of microseconds remaining
+in this forced state).
+
+
+Multiple Warnings From One Stall
+
+If a stall lasts long enough, multiple stall-warning messages will be
+printed for it.  The second and subsequent messages are printed at
+longer intervals, so that the time between (say) the first and second
+message will be about three times the interval between the beginning
+of the stall and the first message.
+
+
+What Causes RCU CPU Stall Warnings?
+
 So your kernel printed an RCU CPU stall warning.  The next question is
 "What caused it?"  The following problems can result in RCU CPU stall
 warnings:
@@ -128,4 +200,5 @@ is occurring, which will usually be in the function nearest the top of
 that portion of the stack which remains the same from trace to trace.
 If you can reliably trigger the stall, ftrace can be quite helpful.
 
-RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE.
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing.
index d67068d..375d3fb 100644 (file)
@@ -69,6 +69,13 @@ onoff_interval
                CPU-hotplug operations regardless of what value is
                specified for onoff_interval.
 
+onoff_holdoff  The number of seconds to wait until starting CPU-hotplug
+               operations.  This would normally only be used when
+               rcutorture was built into the kernel and started
+               automatically at boot time, in which case it is useful
+               in order to avoid confusing boot-time code with CPUs
+               coming and going.
+
 shuffle_interval
                The number of seconds to keep the test threads affinitied
                to a particular subset of the CPUs, defaults to 3 seconds.
@@ -79,6 +86,24 @@ shutdown_secs        The number of seconds to run the test before terminating
                zero, which disables test termination and system shutdown.
                This capability is useful for automated testing.
 
+stall_cpu      The number of seconds that a CPU should be stalled while
+               within both an rcu_read_lock() and a preempt_disable().
+               This stall happens only once per rcutorture run.
+               If you need multiple stalls, use modprobe and rmmod to
+               repeatedly run rcutorture.  The default for stall_cpu
+               is zero, which prevents rcutorture from stalling a CPU.
+
+               Note that attempts to rmmod rcutorture while the stall
+               is ongoing will hang, so be careful what value you
+               choose for this module parameter!  In addition, too-large
+               values for stall_cpu might well induce failures and
+               warnings in other parts of the kernel.  You have been
+               warned!
+
+stall_cpu_holdoff
+               The number of seconds to wait after rcutorture starts
+               before stalling a CPU.  Defaults to 10 seconds.
+
 stat_interval  The number of seconds between output of torture
                statistics (via printk()).  Regardless of the interval,
                statistics are printed when the module is unloaded.
@@ -271,11 +296,13 @@ The following script may be used to torture RCU:
        #!/bin/sh
 
        modprobe rcutorture
-       sleep 100
+       sleep 3600
        rmmod rcutorture
        dmesg | grep torture:
 
 The output can be manually inspected for the error flag of "!!!".
 One could of course create a more elaborate script that automatically
-checked for such errors.  The "rmmod" command forces a "SUCCESS" or
-"FAILURE" indication to be printk()ed.
+checked for such errors.  The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed.  The first
+two are self-explanatory, while the last indicates that while there
+were no RCU failures, CPU-hotplug problems were detected.
index 49587ab..f6f15ce 100644 (file)
@@ -33,23 +33,23 @@ rcu/rcuboost:
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu_sched:
-  0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
-  1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
-  2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
-  3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
-  4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
-  5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
-  6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
-  7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
+  0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
+  1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
+  2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
+  3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
+  4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
+  5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
+  6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
+  7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
 rcu_bh:
-  0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
-  1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
-  2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
-  3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
-  4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
-  5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
-  6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
-  7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
+  0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
+  1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
+  2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
+  3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
+  4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
+  5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
+  6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
+  7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
 
 The first section lists the rcu_data structures for rcu_sched, the second
 for rcu_bh.  Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -119,10 +119,6 @@ o  "of" is the number of times that some other CPU has forced a
        CPU is offline when it is really alive and kicking) is a fatal
        error, so it makes sense to err conservatively.
 
-o      "ri" is the number of times that RCU has seen fit to send a
-       reschedule IPI to this CPU in order to get it to report a
-       quiescent state.
-
 o      "ql" is the number of RCU callbacks currently residing on
        this CPU.  This is the total number of callbacks, regardless
        of what state they are in (new, waiting for grace period to
index 141087c..fd2bd56 100644 (file)
@@ -7,9 +7,9 @@ Each LED is represented as a sub-node of the gpio-leds device.  Each
 node's name represents the name of the corresponding LED.
 
 LED sub-node properties:
-- gpios :  Should specify the LED's GPIO, see "Specifying GPIO information
-  for devices" in Documentation/devicetree/booting-without-of.txt.  Active
-  low LEDs should be indicated using flags in the GPIO specifier.
+- gpios :  Should specify the LED's GPIO, see "gpios property" in
+  Documentation/devicetree/gpio.txt.  Active low LEDs should be
+  indicated using flags in the GPIO specifier.
 - label :  (optional) The label for this LED.  If omitted, the label is
   taken from the node name (excluding the unit address).
 - linux,default-trigger :  (optional) This parameter, if present, is a
index ecc6a6c..a20008a 100644 (file)
@@ -30,6 +30,7 @@ national      National Semiconductor
 nintendo       Nintendo
 nvidia NVIDIA
 nxp    NXP Semiconductors
+picochip       Picochip Ltd
 powervr        Imagination Technologies
 qcom   Qualcomm, Inc.
 ramtron        Ramtron International
index a22ecf4..52729a7 100644 (file)
@@ -7,21 +7,29 @@ Supported chips:
     Addresses scanned: I2C 0x18 - 0x1f
     Datasheets:
        http://www.analog.com/static/imported-files/data_sheets/ADT7408.pdf
-  * IDT TSE2002B3, TS3000B3
-    Prefix: 'tse2002b3', 'ts3000b3'
+  * Atmel AT30TS00
+    Prefix: 'at30ts00'
     Addresses scanned: I2C 0x18 - 0x1f
     Datasheets:
-       http://www.idt.com/products/getdoc.cfm?docid=18715691
-       http://www.idt.com/products/getdoc.cfm?docid=18715692
+       http://www.atmel.com/Images/doc8585.pdf
+  * IDT TSE2002B3, TSE2002GB2, TS3000B3, TS3000GB2
+    Prefix: 'tse2002', 'ts3000'
+    Addresses scanned: I2C 0x18 - 0x1f
+    Datasheets:
+       http://www.idt.com/sites/default/files/documents/IDT_TSE2002B3C_DST_20100512_120303152056.pdf
+       http://www.idt.com/sites/default/files/documents/IDT_TSE2002GB2A1_DST_20111107_120303145914.pdf
+       http://www.idt.com/sites/default/files/documents/IDT_TS3000B3A_DST_20101129_120303152013.pdf
+       http://www.idt.com/sites/default/files/documents/IDT_TS3000GB2A1_DST_20111104_120303151012.pdf
   * Maxim MAX6604
     Prefix: 'max6604'
     Addresses scanned: I2C 0x18 - 0x1f
     Datasheets:
        http://datasheets.maxim-ic.com/en/ds/MAX6604.pdf
-  * Microchip MCP9805, MCP98242, MCP98243, MCP9843
-    Prefixes: 'mcp9805', 'mcp98242', 'mcp98243', 'mcp9843'
+  * Microchip MCP9804, MCP9805, MCP98242, MCP98243, MCP9843
+    Prefixes: 'mcp9804', 'mcp9805', 'mcp98242', 'mcp98243', 'mcp9843'
     Addresses scanned: I2C 0x18 - 0x1f
     Datasheets:
+       http://ww1.microchip.com/downloads/en/DeviceDoc/22203C.pdf
        http://ww1.microchip.com/downloads/en/DeviceDoc/21977b.pdf
        http://ww1.microchip.com/downloads/en/DeviceDoc/21996a.pdf
        http://ww1.microchip.com/downloads/en/DeviceDoc/22153c.pdf
@@ -48,6 +56,12 @@ Supported chips:
     Datasheets:
        http://www.st.com/stonline/products/literature/ds/13447/stts424.pdf
        http://www.st.com/stonline/products/literature/ds/13448/stts424e02.pdf
+  * ST Microelectronics STTS2002, STTS3000
+    Prefix: 'stts2002', 'stts3000'
+    Addresses scanned: I2C 0x18 - 0x1f
+    Datasheets:
+       http://www.st.com/internet/com/TECHNICAL_RESOURCES/TECHNICAL_LITERATURE/DATASHEET/CD00225278.pdf
+       http://www.st.com/internet/com/TECHNICAL_RESOURCES/TECHNICAL_LITERATURE/DATA_BRIEF/CD00270920.pdf
   * JEDEC JC 42.4 compliant temperature sensor chips
     Prefix: 'jc42'
     Addresses scanned: I2C 0x18 - 0x1f
index 3f44dbd..ceaf6f6 100644 (file)
@@ -50,7 +50,7 @@ W83627DHG, W83627DHG-P, W83627UHG, W83667HG, W83667HG-B, W83667HG-I
 (NCT6775F), and NCT6776F super I/O chips. We will refer to them collectively
 as Winbond chips.
 
-The chips implement 2 to 4 temperature sensors (9 for NCT6775F and NCT6776F),
+The chips implement 3 to 4 temperature sensors (9 for NCT6775F and NCT6776F),
 2 to 5 fan rotation speed sensors, 8 to 10 analog voltage sensors, one VID
 (except for 627UHG), alarms with beep warnings (control unimplemented),
 and some automatic fan regulation strategies (plus manual fan control mode).
@@ -143,8 +143,13 @@ pwm[1-4]_min_output - minimum fan speed (range 1 - 255), when the temperature
 pwm[1-4]_stop_time  - how many milliseconds [ms] must elapse to switch
                       corresponding fan off. (when the temperature was below
                       defined range).
+pwm[1-4]_start_output-minimum fan speed (range 1 - 255) when spinning up
+pwm[1-4]_step_output- rate of fan speed change (1 - 255)
+pwm[1-4]_stop_output- minimum fan speed (range 1 - 255) when spinning down
+pwm[1-4]_max_output - maximum fan speed (range 1 - 255), when the temperature
+                      is above defined range.
 
-Note: last two functions are influenced by other control bits, not yet exported
+Note: last six functions are influenced by other control bits, not yet exported
       by the driver, so a change might not have any effect.
 
 Implementation Details
index 51f76a1..a4e8d90 100644 (file)
@@ -88,14 +88,12 @@ Module parameters
 delay
 -----
 
-Some Intersil/Zilker Labs DC-DC controllers require a minimum interval between
-I2C bus accesses. According to Intersil, the minimum interval is 2 ms, though
-1 ms appears to be sufficient and has not caused any problems in testing.
-The problem is known to affect ZL6100, ZL2105, and ZL2008. It is known not to
-affect ZL2004 and ZL6105. The driver automatically sets the interval to 1 ms
-except for ZL2004 and ZL6105. To enable manual override, the driver provides a
-writeable module parameter, 'delay', which can be used to set the interval to
-a value between 0 and 65,535 microseconds.
+Intersil/Zilker Labs DC-DC controllers require a minimum interval between I2C
+bus accesses. According to Intersil, the minimum interval is 2 ms, though 1 ms
+appears to be sufficient and has not caused any problems in testing. The problem
+is known to affect all currently supported chips. For manual override, the
+driver provides a writeable module parameter, 'delay', which can be used to set
+the interval to a value between 0 and 65,535 microseconds.
 
 
 Sysfs entries
index f274c28..2f95308 100644 (file)
@@ -13,7 +13,8 @@ Detection
 
 All ALPS touchpads should respond to the "E6 report" command sequence:
 E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or
-00-00-64.
+00-00-64 if no buttons are pressed. The bits 0-2 of the first byte will be 1s
+if some buttons are pressed.
 
 If the E6 report is successful, the touchpad model is identified using the "E7
 report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
index 033d4e6..d99fd9c 100644 (file)
@@ -2211,6 +2211,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
                        default: off.
 
+       printk.always_kmsg_dump=
+                       Trigger kmsg_dump for cases other than kernel oops or
+                       panics
+                       Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
+                       default: disabled
+
        printk.time=    Show timing data prefixed to each printk message line
                        Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
new file mode 100644 (file)
index 0000000..d2a3660
--- /dev/null
@@ -0,0 +1,63 @@
+===============================================================
+Softlockup detector and hardlockup detector (aka nmi_watchdog)
+===============================================================
+
+The Linux kernel can act as a watchdog to detect both soft and hard
+lockups.
+
+A 'softlockup' is defined as a bug that causes the kernel to loop in
+kernel mode for more than 20 seconds (see "Implementation" below for
+details), without giving other tasks a chance to run. The current
+stack trace is displayed upon detection and, by default, the system
+will stay locked up. Alternatively, the kernel can be configured to
+panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
+"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
+details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are
+provided for this.
+
+A 'hardlockup' is defined as a bug that causes the CPU to loop in
+kernel mode for more than 10 seconds (see "Implementation" below for
+details), without letting other interrupts have a chance to run.
+Similarly to the softlockup case, the current stack trace is displayed
+upon detection and the system will stay locked up unless the default
+behavior is changed, which can be done through a compile time knob,
+"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
+(see "Documentation/kernel-parameters.txt" for details).
+
+The panic option can be used in combination with panic_timeout (this
+timeout is set through the confusingly named "kernel.panic" sysctl),
+to cause the system to reboot automatically after a specified amount
+of time.
+
+=== Implementation ===
+
+The soft and hard lockup detectors are built on top of the hrtimer and
+perf subsystems, respectively. A direct consequence of this is that,
+in principle, they should work in any architecture where these
+subsystems are present.
+
+A periodic hrtimer runs to generate interrupts and kick the watchdog
+task. An NMI perf event is generated every "watchdog_thresh"
+(compile-time initialized to 10 and configurable through sysctl of the
+same name) seconds to check for hardlockups. If any CPU in the system
+does not receive any hrtimer interrupt during that time the
+'hardlockup detector' (the handler for the NMI perf event) will
+generate a kernel warning or call panic, depending on the
+configuration.
+
+The watchdog task is a high priority kernel thread that updates a
+timestamp every time it is scheduled. If that timestamp is not updated
+for 2*watchdog_thresh seconds (the softlockup threshold) the
+'softlockup detector' (coded inside the hrtimer callback function)
+will dump useful debug information to the system log, after which it
+will call panic if it was instructed to do so or resume execution of
+other kernel code.
+
+The period of the hrtimer is 2*watchdog_thresh/5, which means it has
+two or three chances to generate an interrupt before the hardlockup
+detector kicks in.
+
+As explained above, a kernel knob is provided that allows
+administrators to configure the period of the hrtimer and the perf
+event. The right value for a particular environment is a trade-off
+between fast response to lockups and detection overhead.
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
deleted file mode 100644 (file)
index bf9f80a..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-
-[NMI watchdog is available for x86 and x86-64 architectures]
-
-Is your system locking up unpredictably? No keyboard activity, just
-a frustrating complete hard lockup? Do you want to help us debugging
-such lockups? If all yes then this document is definitely for you.
-
-On many x86/x86-64 type hardware there is a feature that enables
-us to generate 'watchdog NMI interrupts'.  (NMI: Non Maskable Interrupt
-which get executed even if the system is otherwise locked up hard).
-This can be used to debug hard kernel lockups.  By executing periodic
-NMI interrupts, the kernel can monitor whether any CPU has locked up,
-and print out debugging messages if so.
-
-In order to use the NMI watchdog, you need to have APIC support in your
-kernel. For SMP kernels, APIC support gets compiled in automatically. For
-UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local
-APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and
-features -> IO-APIC support on uniprocessors) in your kernel config.
-CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC.
-CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
-kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
-may implicitly disable the NMI watchdog.]
-
-For x86-64, the needed APIC is always compiled in.
-
-Using local APIC (nmi_watchdog=2) needs the first performance register, so
-you can't use it for other purposes (such as high precision performance
-profiling.) However, at least oprofile and the perfctr driver disable the
-local APIC NMI watchdog automatically.
-
-To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot
-parameter.  Eg. the relevant lilo.conf entry:
-
-        append="nmi_watchdog=1"
-
-For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1.
-For UP machines without an IO-APIC use nmi_watchdog=2, this only works
-for some processor types.  If in doubt, boot with nmi_watchdog=1 and
-check the NMI count in /proc/interrupts; if the count is zero then
-reboot with nmi_watchdog=2 and check the NMI count.  If it is still
-zero then log a problem, you probably have a processor that needs to be
-added to the nmi code.
-
-A 'lockup' is the following scenario: if any CPU in the system does not
-execute the period local timer interrupt for more than 5 seconds, then
-the NMI handler generates an oops and kills the process. This
-'controlled crash' (and the resulting kernel messages) can be used to
-debug the lockup. Thus whenever the lockup happens, wait 5 seconds and
-the oops will show up automatically. If the kernel produces no messages
-then the system has crashed so hard (eg. hardware-wise) that either it
-cannot even accept NMI interrupts, or the crash has made the kernel
-unable to print messages.
-
-Be aware that when using local APIC, the frequency of NMI interrupts
-it generates, depends on the system load. The local APIC NMI watchdog,
-lacking a better source, uses the "cycles unhalted" event. As you may
-guess it doesn't tick when the CPU is in the halted state (which happens
-when the system is idle), but if your system locks up on anything but the
-"hlt" processor instruction, the watchdog will trigger very soon as the
-"cycles unhalted" event will happen every clock tick. If it locks up on
-"hlt", then you are out of luck -- the event will not happen at all and the
-watchdog won't trigger. This is a shortcoming of the local APIC watchdog
--- unfortunately there is no "clock ticks" event that would work all the
-time. The I/O APIC watchdog is driven externally and has no such shortcoming.
-But its NMI frequency is much higher, resulting in a more significant hit
-to the overall system performance.
-
-On x86 nmi_watchdog is disabled by default so you have to enable it with
-a boot time parameter.
-
-It's possible to disable the NMI watchdog in run-time by writing "0" to
-/proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable
-the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter
-at boot time.
-
-NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
-on x86 SMP boxes.
-
-[ feel free to send bug reports, suggestions and patches to
-  Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
-  list at <linux-smp@vger.kernel.org> ]
-
index 1cd5d51..8259b34 100644 (file)
@@ -38,7 +38,8 @@ First field is a sched_yield() statistic:
      1) # of times sched_yield() was called
 
 Next three are schedule() statistics:
-     2) # of times we switched to the expired queue and reused it
+     2) This field is a legacy array expiration count field used in the O(1)
+       scheduler. We kept it for ABI compatibility, but it is always set to zero.
      3) # of times schedule() was called
      4) # of times schedule() left the processor idle
 
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
new file mode 100644 (file)
index 0000000..d93f3c0
--- /dev/null
@@ -0,0 +1,286 @@
+                       Static Keys
+                       -----------
+
+By: Jason Baron <jbaron@redhat.com>
+
+0) Abstract
+
+Static keys allows the inclusion of seldom used features in
+performance-sensitive fast-path kernel code, via a GCC feature and a code
+patching technique. A quick example:
+
+       struct static_key key = STATIC_KEY_INIT_FALSE;
+
+       ...
+
+        if (static_key_false(&key))
+                do unlikely code
+        else
+                do likely code
+
+       ...
+       static_key_slow_inc();
+       ...
+       static_key_slow_inc();
+       ...
+
+The static_key_false() branch will be generated into the code with as little
+impact to the likely code path as possible.
+
+
+1) Motivation
+
+
+Currently, tracepoints are implemented using a conditional branch. The
+conditional check requires checking a global variable for each tracepoint.
+Although the overhead of this check is small, it increases when the memory
+cache comes under pressure (memory cache lines for these global variables may
+be shared with other memory accesses). As we increase the number of tracepoints
+in the kernel this overhead may become more of an issue. In addition,
+tracepoints are often dormant (disabled) and provide no direct kernel
+functionality. Thus, it is highly desirable to reduce their impact as much as
+possible. Although tracepoints are the original motivation for this work, other
+kernel code paths should be able to make use of the static keys facility.
+
+
+2) Solution
+
+
+gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label:
+
+http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html
+
+Using the 'asm goto', we can create branches that are either taken or not taken
+by default, without the need to check memory. Then, at run-time, we can patch
+the branch site to change the branch direction.
+
+For example, if we have a simple branch that is disabled by default:
+
+       if (static_key_false(&key))
+               printk("I am the true branch\n");
+
+Thus, by default the 'printk' will not be emitted. And the code generated will
+consist of a single atomic 'no-op' instruction (5 bytes on x86), in the
+straight-line code path. When the branch is 'flipped', we will patch the
+'no-op' in the straight-line codepath with a 'jump' instruction to the
+out-of-line true branch. Thus, changing branch direction is expensive but
+branch selection is basically 'free'. That is the basic tradeoff of this
+optimization.
+
+This lowlevel patching mechanism is called 'jump label patching', and it gives
+the basis for the static keys facility.
+
+3) Static key label API, usage and examples:
+
+
+In order to make use of this optimization you must first define a key:
+
+       struct static_key key;
+
+Which is initialized as:
+
+       struct static_key key = STATIC_KEY_INIT_TRUE;
+
+or:
+
+       struct static_key key = STATIC_KEY_INIT_FALSE;
+
+If the key is not initialized, it is default false. The 'struct static_key',
+must be a 'global'. That is, it can't be allocated on the stack or dynamically
+allocated at run-time.
+
+The key is then used in code as:
+
+        if (static_key_false(&key))
+                do unlikely code
+        else
+                do likely code
+
+Or:
+
+        if (static_key_true(&key))
+                do likely code
+        else
+                do unlikely code
+
+A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
+'static_key_false()' construct. Likewise, a key initialized via
+'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
+single key can be used in many branches, but all the branches must match the
+way that the key has been initialized.
+
+The branch(es) can then be switched via:
+
+       static_key_slow_inc(&key);
+       ...
+       static_key_slow_dec(&key);
+
+Thus, 'static_key_slow_inc()' means 'make the branch true', and
+'static_key_slow_dec()' means 'make the the branch false' with appropriate
+reference counting. For example, if the key is initialized true, a
+static_key_slow_dec(), will switch the branch to false. And a subsequent
+static_key_slow_inc(), will change the branch back to true. Likewise, if the
+key is initialized false, a 'static_key_slow_inc()', will change the branch to
+true. And then a 'static_key_slow_dec()', will again make the branch false.
+
+An example usage in the kernel is the implementation of tracepoints:
+
+        static inline void trace_##name(proto)                          \
+        {                                                               \
+                if (static_key_false(&__tracepoint_##name.key))                \
+                        __DO_TRACE(&__tracepoint_##name,                \
+                                TP_PROTO(data_proto),                   \
+                                TP_ARGS(data_args),                     \
+                                TP_CONDITION(cond));                    \
+        }
+
+Tracepoints are disabled by default, and can be placed in performance critical
+pieces of the kernel. Thus, by using a static key, the tracepoints can have
+absolutely minimal impact when not in use.
+
+
+4) Architecture level code patching interface, 'jump labels'
+
+
+There are a few functions and macros that architectures must implement in order
+to take advantage of this optimization. If there is no architecture support, we
+simply fall back to a traditional, load, test, and jump sequence.
+
+* select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
+
+* #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
+
+* __always_inline bool arch_static_branch(struct static_key *key), see:
+                                       arch/x86/include/asm/jump_label.h
+
+* void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
+                                       see: arch/x86/kernel/jump_label.c
+
+* __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type),
+                                       see: arch/x86/kernel/jump_label.c
+
+
+* struct jump_entry, see: arch/x86/include/asm/jump_label.h
+
+
+5) Static keys / jump label analysis, results (x86_64):
+
+
+As an example, let's add the following branch to 'getppid()', such that the
+system call now looks like:
+
+SYSCALL_DEFINE0(getppid)
+{
+        int pid;
+
++       if (static_key_false(&key))
++               printk("I am the true branch\n");
+
+        rcu_read_lock();
+        pid = task_tgid_vnr(rcu_dereference(current->real_parent));
+        rcu_read_unlock();
+
+        return pid;
+}
+
+The resulting instructions with jump labels generated by GCC is:
+
+ffffffff81044290 <sys_getppid>:
+ffffffff81044290:       55                      push   %rbp
+ffffffff81044291:       48 89 e5                mov    %rsp,%rbp
+ffffffff81044294:       e9 00 00 00 00          jmpq   ffffffff81044299 <sys_getppid+0x9>
+ffffffff81044299:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
+ffffffff810442a0:       00 00
+ffffffff810442a2:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
+ffffffff810442a9:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
+ffffffff810442b0:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
+ffffffff810442b7:       e8 f4 d9 00 00          callq  ffffffff81051cb0 <pid_vnr>
+ffffffff810442bc:       5d                      pop    %rbp
+ffffffff810442bd:       48 98                   cltq
+ffffffff810442bf:       c3                      retq
+ffffffff810442c0:       48 c7 c7 e3 54 98 81    mov    $0xffffffff819854e3,%rdi
+ffffffff810442c7:       31 c0                   xor    %eax,%eax
+ffffffff810442c9:       e8 71 13 6d 00          callq  ffffffff8171563f <printk>
+ffffffff810442ce:       eb c9                   jmp    ffffffff81044299 <sys_getppid+0x9>
+
+Without the jump label optimization it looks like:
+
+ffffffff810441f0 <sys_getppid>:
+ffffffff810441f0:       8b 05 8a 52 d8 00       mov    0xd8528a(%rip),%eax        # ffffffff81dc9480 <key>
+ffffffff810441f6:       55                      push   %rbp
+ffffffff810441f7:       48 89 e5                mov    %rsp,%rbp
+ffffffff810441fa:       85 c0                   test   %eax,%eax
+ffffffff810441fc:       75 27                   jne    ffffffff81044225 <sys_getppid+0x35>
+ffffffff810441fe:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
+ffffffff81044205:       00 00
+ffffffff81044207:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
+ffffffff8104420e:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
+ffffffff81044215:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
+ffffffff8104421c:       e8 2f da 00 00          callq  ffffffff81051c50 <pid_vnr>
+ffffffff81044221:       5d                      pop    %rbp
+ffffffff81044222:       48 98                   cltq
+ffffffff81044224:       c3                      retq
+ffffffff81044225:       48 c7 c7 13 53 98 81    mov    $0xffffffff81985313,%rdi
+ffffffff8104422c:       31 c0                   xor    %eax,%eax
+ffffffff8104422e:       e8 60 0f 6d 00          callq  ffffffff81715193 <printk>
+ffffffff81044233:       eb c9                   jmp    ffffffff810441fe <sys_getppid+0xe>
+ffffffff81044235:       66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
+ffffffff8104423c:       00 00 00 00
+
+Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction
+vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched
+to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump
+label case adds:
+
+6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
+
+If we then include the padding bytes, the jump label code saves, 16 total bytes
+of instruction memory for this small fucntion. In this case the non-jump label
+function is 80 bytes long. Thus, we have have saved 20% of the instruction
+footprint. We can in fact improve this even further, since the 5-byte no-op
+really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp.
+However, we have not yet implemented optimal no-op sizes (they are currently
+hard-coded).
+
+Since there are a number of static key API uses in the scheduler paths,
+'pipe-test' (also known as 'perf bench sched pipe') can be used to show the
+performance improvement. Testing done on 3.3.0-rc2:
+
+jump label disabled:
+
+ Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
+
+        855.700314 task-clock                #    0.534 CPUs utilized            ( +-  0.11% )
+           200,003 context-switches          #    0.234 M/sec                    ( +-  0.00% )
+                 0 CPU-migrations            #    0.000 M/sec                    ( +- 39.58% )
+               487 page-faults               #    0.001 M/sec                    ( +-  0.02% )
+     1,474,374,262 cycles                    #    1.723 GHz                      ( +-  0.17% )
+   <not supported> stalled-cycles-frontend
+   <not supported> stalled-cycles-backend
+     1,178,049,567 instructions              #    0.80  insns per cycle          ( +-  0.06% )
+       208,368,926 branches                  #  243.507 M/sec                    ( +-  0.06% )
+         5,569,188 branch-misses             #    2.67% of all branches          ( +-  0.54% )
+
+       1.601607384 seconds time elapsed                                          ( +-  0.07% )
+
+jump label enabled:
+
+ Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
+
+        841.043185 task-clock                #    0.533 CPUs utilized            ( +-  0.12% )
+           200,004 context-switches          #    0.238 M/sec                    ( +-  0.00% )
+                 0 CPU-migrations            #    0.000 M/sec                    ( +- 40.87% )
+               487 page-faults               #    0.001 M/sec                    ( +-  0.05% )
+     1,432,559,428 cycles                    #    1.703 GHz                      ( +-  0.18% )
+   <not supported> stalled-cycles-frontend
+   <not supported> stalled-cycles-backend
+     1,175,363,994 instructions              #    0.82  insns per cycle          ( +-  0.04% )
+       206,859,359 branches                  #  245.956 M/sec                    ( +-  0.04% )
+         4,884,119 branch-misses             #    2.36% of all branches          ( +-  0.85% )
+
+       1.579384366 seconds time elapsed
+
+The percentage of saved branches is .7%, and we've saved 12% on
+'branch-misses'. This is where we would expect to get the most savings, since
+this optimization is about reducing the number of branches. In addition, we've
+saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time.
index 1ebc24c..6f51fed 100644 (file)
@@ -226,6 +226,13 @@ Here is the list of current tracers that may be configured.
        Traces and records the max latency that it takes for
        the highest priority task to get scheduled after
        it has been woken up.
+        Traces all tasks as an average developer would expect.
+
+  "wakeup_rt"
+
+        Traces and records the max latency that it takes for just
+        RT tasks (as the current "wakeup" does). This is useful
+        for those interested in wake up timings of RT tasks.
 
   "hw-branch-tracer"
 
index 4e41d52..9c63a43 100644 (file)
@@ -962,7 +962,7 @@ F:  drivers/tty/serial/msm_serial.c
 F:     drivers/platform/msm/
 F:     drivers/*/pm8???-*
 F:     include/linux/mfd/pm8xxx/
-T:     git git://codeaurora.org/quic/kernel/davidb/linux-msm.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git
 S:     Maintained
 
 ARM/TOSA MACHINE SUPPORT
@@ -1310,7 +1310,7 @@ F:        drivers/atm/
 F:     include/linux/atm*
 
 ATMEL AT91 MCI DRIVER
-M:     Nicolas Ferre <nicolas.ferre@atmel.com>
+M:     Ludovic Desroches <ludovic.desroches@atmel.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.atmel.com/products/AT91/
 W:     http://www.at91.com/
@@ -1318,7 +1318,7 @@ S:        Maintained
 F:     drivers/mmc/host/at91_mci.c
 
 ATMEL AT91 / AT32 MCI DRIVER
-M:     Nicolas Ferre <nicolas.ferre@atmel.com>
+M:     Ludovic Desroches <ludovic.desroches@atmel.com>
 S:     Maintained
 F:     drivers/mmc/host/atmel-mci.c
 F:     drivers/mmc/host/atmel-mci-regs.h
@@ -1513,19 +1513,23 @@ F:      drivers/mtd/devices/block2mtd.c
 
 BLUETOOTH DRIVERS
 M:     Marcel Holtmann <marcel@holtmann.org>
-M:     "Gustavo F. Padovan" <padovan@profusion.mobi>
+M:     Gustavo Padovan <gustavo@padovan.org>
+M:     Johan Hedberg <johan.hedberg@gmail.com>
 L:     linux-bluetooth@vger.kernel.org
 W:     http://www.bluez.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jh/bluetooth.git
 S:     Maintained
 F:     drivers/bluetooth/
 
 BLUETOOTH SUBSYSTEM
 M:     Marcel Holtmann <marcel@holtmann.org>
-M:     "Gustavo F. Padovan" <padovan@profusion.mobi>
+M:     Gustavo Padovan <gustavo@padovan.org>
+M:     Johan Hedberg <johan.hedberg@gmail.com>
 L:     linux-bluetooth@vger.kernel.org
 W:     http://www.bluez.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jh/bluetooth.git
 S:     Maintained
 F:     net/bluetooth/
 F:     include/net/bluetooth/
@@ -1717,6 +1721,14 @@ F:       include/linux/can/error.h
 F:     include/linux/can/netlink.h
 F:     include/linux/can/platform/
 
+CAPABILITIES
+M:     Serge Hallyn <serge.hallyn@canonical.com>
+L:     linux-security-module@vger.kernel.org
+S:     Supported       
+F:     include/linux/capability.h
+F:     security/capability.c
+F:     security/commoncap.c 
+
 CELL BROADBAND ENGINE ARCHITECTURE
 M:     Arnd Bergmann <arnd@arndb.de>
 L:     linuxppc-dev@lists.ozlabs.org
@@ -2351,6 +2363,15 @@ S:       Supported
 F:     drivers/gpu/drm/exynos
 F:     include/drm/exynos*
 
+EXYNOS MIPI DISPLAY DRIVERS
+M:     Inki Dae <inki.dae@samsung.com>
+M:     Donghwa Lee <dh09.lee@samsung.com>
+M:     Kyungmin Park <kyungmin.park@samsung.com>
+L:     linux-fbdev@vger.kernel.org
+S:     Maintained
+F:     drivers/video/exynos/exynos_mipi*
+F:     include/video/exynos_mipi*
+
 DSCC4 DRIVER
 M:     Francois Romieu <romieu@fr.zoreil.com>
 L:     netdev@vger.kernel.org
@@ -2845,6 +2866,12 @@ S:       Maintained
 F:     drivers/media/video/m5mols/
 F:     include/media/m5mols.h
 
+FUJITSU TABLET EXTRAS
+M:     Robert Gerlach <khnz@gmx.de>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/fujitsu-tablet.c
+
 FUSE: FILESYSTEM IN USERSPACE
 M:     Miklos Szeredi <miklos@szeredi.hu>
 L:     fuse-devel@lists.sourceforge.net
@@ -5853,6 +5880,7 @@ F:        drivers/mmc/host/sdhci-s3c.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER
 M:     Viresh Kumar <viresh.kumar@st.com>
+L:     spear-devel@list.st.com
 L:     linux-mmc@vger.kernel.org
 S:     Maintained
 F:     drivers/mmc/host/sdhci-spear.c
@@ -6195,24 +6223,32 @@ F:      drivers/tty/serial/sunzilog.h
 
 SPEAR PLATFORM SUPPORT
 M:     Viresh Kumar <viresh.kumar@st.com>
+L:     spear-devel@list.st.com
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
 F:     arch/arm/plat-spear/
 
 SPEAR3XX MACHINE SUPPORT
 M:     Viresh Kumar <viresh.kumar@st.com>
+L:     spear-devel@list.st.com
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
 F:     arch/arm/mach-spear3xx/
 
 SPEAR6XX MACHINE SUPPORT
 M:     Rajeev Kumar <rajeev-dlh.kumar@st.com>
+L:     spear-devel@list.st.com
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
 F:     arch/arm/mach-spear6xx/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
 M:     Viresh Kumar <viresh.kumar@st.com>
+L:     spear-devel@list.st.com
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
 F:     arch/arm/mach-spear*/clock.c
@@ -6221,6 +6257,8 @@ F:        arch/arm/plat-spear/include/plat/clock.h
 
 SPEAR PAD MULTIPLEXING SUPPORT
 M:     Viresh Kumar <viresh.kumar@st.com>
+L:     spear-devel@list.st.com
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
 F:     arch/arm/plat-spear/include/plat/padmux.h
index 66d13c9..1932984 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION =
 NAME = Saber-toothed Squirrel
 
 # *DOCUMENTATION*
index 4f55c73..5b448a7 100644 (file)
@@ -47,18 +47,29 @@ config KPROBES
          If in doubt, say "N".
 
 config JUMP_LABEL
-       bool "Optimize trace point call sites"
+       bool "Optimize very unlikely/likely branches"
        depends on HAVE_ARCH_JUMP_LABEL
        help
+         This option enables a transparent branch optimization that
+        makes certain almost-always-true or almost-always-false branch
+        conditions even cheaper to execute within the kernel.
+
+        Certain performance-sensitive kernel code, such as trace points,
+        scheduler functionality, networking code and KVM have such
+        branches and include support for this optimization technique.
+
          If it is detected that the compiler has support for "asm goto",
-        the kernel will compile trace point locations with just a
-        nop instruction. When trace points are enabled, the nop will
-        be converted to a jump to the trace function. This technique
-        lowers overhead and stress on the branch prediction of the
-        processor.
-
-        On i386, options added to the compiler flags may increase
-        the size of the kernel slightly.
+        the kernel will compile such branches with just a nop
+        instruction. When the condition flag is toggled to true, the
+        nop will be converted to a jump instruction to execute the
+        conditional block of instructions.
+
+        This technique lowers overhead and stress on the branch prediction
+        of the processor and generally makes the kernel faster. The update
+        of the condition is slower, but those are always very rare.
+
+        ( On 32-bit x86, the necessary options added to the compiler
+          flags may increase the size of the kernel slightly. )
 
 config OPTPROBES
        def_bool y
index e8a761a..f939794 100644 (file)
@@ -108,7 +108,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
        "       lda     $31,3b-2b(%0)\n"
        "       .previous\n"
        :       "+r"(ret), "=&r"(prev), "=&r"(cmp)
-       :       "r"(uaddr), "r"((long)oldval), "r"(newval)
+       :       "r"(uaddr), "r"((long)(int)oldval), "r"(newval)
        :       "memory");
 
        *uval = prev;
index 8143cd7..0dae252 100644 (file)
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
 {
        int err;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (event->attr.type) {
        case PERF_TYPE_RAW:
        case PERF_TYPE_HARDWARE:
index a48aecc..dfb0312 100644 (file)
@@ -1280,7 +1280,7 @@ config ARM_ERRATA_743622
        depends on CPU_V7
        help
          This option enables the workaround for the 743622 Cortex-A9
-         (r2p0..r2p2) erratum. Under very rare conditions, a faulty
+         (r2p*) erratum. Under very rare conditions, a faulty
          optimisation in the Cortex-A9 Store Buffer may lead to data
          corruption. This workaround sets a specific bit in the diagnostic
          register of the Cortex-A9 which disables the Store Buffer
index ce1c5ff..3c79f85 100644 (file)
@@ -3,3 +3,4 @@ zImage
 xipImage
 bootpImage
 uImage
+*.dtb
index 99cfe36..7523340 100644 (file)
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
 /* ARM perf PMU IDs for use by internal perf clients. */
 enum arm_perf_pmu_ids {
        ARM_PERF_PMU_ID_XSCALE1 = 0,
index b5a5be2..90114fa 100644 (file)
@@ -134,7 +134,7 @@ int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
 
 u64 armpmu_event_update(struct perf_event *event,
                        struct hw_perf_event *hwc,
-                       int idx, int overflow);
+                       int idx);
 
 int armpmu_event_set_period(struct perf_event *event,
                            struct hw_perf_event *hwc,
index 4dd0eda..1651d49 100644 (file)
@@ -242,6 +242,7 @@ static void ecard_init_pgtables(struct mm_struct *mm)
 
        memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE));
 
+       vma.vm_flags = VM_EXEC;
        vma.vm_mm = mm;
 
        flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE);
index 5bb91bf..8a89d3b 100644 (file)
@@ -180,7 +180,7 @@ armpmu_event_set_period(struct perf_event *event,
 u64
 armpmu_event_update(struct perf_event *event,
                    struct hw_perf_event *hwc,
-                   int idx, int overflow)
+                   int idx)
 {
        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
        u64 delta, prev_raw_count, new_raw_count;
@@ -193,13 +193,7 @@ again:
                             new_raw_count) != prev_raw_count)
                goto again;
 
-       new_raw_count &= armpmu->max_period;
-       prev_raw_count &= armpmu->max_period;
-
-       if (overflow)
-               delta = armpmu->max_period - prev_raw_count + new_raw_count + 1;
-       else
-               delta = new_raw_count - prev_raw_count;
+       delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
 
        local64_add(delta, &event->count);
        local64_sub(delta, &hwc->period_left);
@@ -216,7 +210,7 @@ armpmu_read(struct perf_event *event)
        if (hwc->idx < 0)
                return;
 
-       armpmu_event_update(event, hwc, hwc->idx, 0);
+       armpmu_event_update(event, hwc, hwc->idx);
 }
 
 static void
@@ -232,7 +226,7 @@ armpmu_stop(struct perf_event *event, int flags)
        if (!(hwc->state & PERF_HES_STOPPED)) {
                armpmu->disable(hwc, hwc->idx);
                barrier(); /* why? */
-               armpmu_event_update(event, hwc, hwc->idx, 0);
+               armpmu_event_update(event, hwc, hwc->idx);
                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
        }
 }
@@ -518,7 +512,13 @@ __hw_perf_event_init(struct perf_event *event)
        hwc->config_base            |= (unsigned long)mapping;
 
        if (!hwc->sample_period) {
-               hwc->sample_period  = armpmu->max_period;
+               /*
+                * For non-sampling runs, limit the sample_period to half
+                * of the counter width. That way, the new counter value
+                * is far less likely to overtake the previous one unless
+                * you have some serious IRQ latency issues.
+                */
+               hwc->sample_period  = armpmu->max_period >> 1;
                hwc->last_period    = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
        }
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
        int err = 0;
        atomic_t *active_events = &armpmu->active_events;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        if (armpmu->map_event(event) == -ENOENT)
                return -ENOENT;
 
@@ -680,6 +684,28 @@ static void __init cpu_pmu_init(struct arm_pmu *armpmu)
 }
 
 /*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int __cpuinit pmu_cpu_notify(struct notifier_block *b,
+                                       unsigned long action, void *hcpu)
+{
+       if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+               return NOTIFY_DONE;
+
+       if (cpu_pmu && cpu_pmu->reset)
+               cpu_pmu->reset(NULL);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata pmu_cpu_notifier = {
+       .notifier_call = pmu_cpu_notify,
+};
+
+/*
  * CPU PMU identification and registration.
  */
 static int __init
@@ -730,6 +756,7 @@ init_hw_perf_events(void)
                pr_info("enabled with %s PMU driver, %d counters available\n",
                        cpu_pmu->name, cpu_pmu->num_events);
                cpu_pmu_init(cpu_pmu);
+               register_cpu_notifier(&pmu_cpu_notifier);
                armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
        } else {
                pr_info("no hardware support available\n");
index 533be99..b78af0c 100644 (file)
@@ -467,23 +467,6 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static int counter_is_active(unsigned long pmcr, int idx)
-{
-       unsigned long mask = 0;
-       if (idx == ARMV6_CYCLE_COUNTER)
-               mask = ARMV6_PMCR_CCOUNT_IEN;
-       else if (idx == ARMV6_COUNTER0)
-               mask = ARMV6_PMCR_COUNT0_IEN;
-       else if (idx == ARMV6_COUNTER1)
-               mask = ARMV6_PMCR_COUNT1_IEN;
-
-       if (mask)
-               return pmcr & mask;
-
-       WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-       return 0;
-}
-
 static irqreturn_t
 armv6pmu_handle_irq(int irq_num,
                    void *dev)
@@ -513,7 +496,8 @@ armv6pmu_handle_irq(int irq_num,
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
 
-               if (!counter_is_active(pmcr, idx))
+               /* Ignore if we don't have an event. */
+               if (!event)
                        continue;
 
                /*
@@ -524,7 +508,7 @@ armv6pmu_handle_irq(int irq_num,
                        continue;
 
                hwc = &event->hw;
-               armpmu_event_update(event, hwc, idx, 1);
+               armpmu_event_update(event, hwc, idx);
                data.period = event->hw.last_period;
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
index 6933244..4d7095a 100644 (file)
@@ -809,6 +809,11 @@ static inline int armv7_pmnc_disable_intens(int idx)
 
        counter = ARMV7_IDX_TO_COUNTER(idx);
        asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
+       isb();
+       /* Clear the overflow flag in case an interrupt is pending. */
+       asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
+       isb();
+
        return idx;
 }
 
@@ -955,6 +960,10 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
 
+               /* Ignore if we don't have an event. */
+               if (!event)
+                       continue;
+
                /*
                 * We have a single interrupt for all counters. Check that
                 * each counter has overflowed before we process it.
@@ -963,7 +972,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
                        continue;
 
                hwc = &event->hw;
-               armpmu_event_update(event, hwc, idx, 1);
+               armpmu_event_update(event, hwc, idx);
                data.period = event->hw.last_period;
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
index 3b99d82..71a21e6 100644 (file)
@@ -255,11 +255,14 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
 
+               if (!event)
+                       continue;
+
                if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
                        continue;
 
                hwc = &event->hw;
-               armpmu_event_update(event, hwc, idx, 1);
+               armpmu_event_update(event, hwc, idx);
                data.period = event->hw.last_period;
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
@@ -592,11 +595,14 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
 
-               if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
+               if (!event)
+                       continue;
+
+               if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
                        continue;
 
                hwc = &event->hw;
-               armpmu_event_update(event, hwc, idx, 1);
+               armpmu_event_update(event, hwc, idx);
                data.period = event->hw.last_period;
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
@@ -663,7 +669,7 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
-       unsigned long flags, ien, evtsel;
+       unsigned long flags, ien, evtsel, of_flags;
        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
        ien = xscale2pmu_read_int_enable();
@@ -672,26 +678,31 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
        switch (idx) {
        case XSCALE_CYCLE_COUNTER:
                ien &= ~XSCALE2_CCOUNT_INT_EN;
+               of_flags = XSCALE2_CCOUNT_OVERFLOW;
                break;
        case XSCALE_COUNTER0:
                ien &= ~XSCALE2_COUNT0_INT_EN;
                evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
                evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+               of_flags = XSCALE2_COUNT0_OVERFLOW;
                break;
        case XSCALE_COUNTER1:
                ien &= ~XSCALE2_COUNT1_INT_EN;
                evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
                evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+               of_flags = XSCALE2_COUNT1_OVERFLOW;
                break;
        case XSCALE_COUNTER2:
                ien &= ~XSCALE2_COUNT2_INT_EN;
                evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
                evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+               of_flags = XSCALE2_COUNT2_OVERFLOW;
                break;
        case XSCALE_COUNTER3:
                ien &= ~XSCALE2_COUNT3_INT_EN;
                evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
                evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+               of_flags = XSCALE2_COUNT3_OVERFLOW;
                break;
        default:
                WARN_ONCE(1, "invalid counter number (%d)\n", idx);
@@ -701,6 +712,7 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
        raw_spin_lock_irqsave(&events->pmu_lock, flags);
        xscale2pmu_write_event_select(evtsel);
        xscale2pmu_write_int_enable(ien);
+       xscale2pmu_write_overflow_flags(of_flags);
        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
index 971d65c..c2ae3cd 100644 (file)
@@ -239,9 +239,7 @@ void cpu_idle(void)
                leds_event(led_idle_end);
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index cdeb727..d616ed5 100644 (file)
@@ -295,13 +295,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
         */
        percpu_timer_setup();
 
-       while (!cpu_active(cpu))
-               cpu_relax();
-
-       /*
-        * cpu_active bit is set, so it's safe to enalbe interrupts
-        * now.
-        */
        local_irq_enable();
        local_fiq_enable();
 
index b7582dd..96e2adc 100644 (file)
 #if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
 static u64 hdmac_dmamask = DMA_BIT_MASK(32);
 
-static struct at_dma_platform_data atdma_pdata = {
-       .nr_channels    = 8,
-};
-
 static struct resource hdmac_resources[] = {
        [0] = {
                .start  = AT91SAM9G45_BASE_DMA,
@@ -56,12 +52,11 @@ static struct resource hdmac_resources[] = {
 };
 
 static struct platform_device at_hdmac_device = {
-       .name           = "at_hdmac",
+       .name           = "at91sam9g45_dma",
        .id             = -1,
        .dev            = {
                                .dma_mask               = &hdmac_dmamask,
                                .coherent_dma_mask      = DMA_BIT_MASK(32),
-                               .platform_data          = &atdma_pdata,
        },
        .resource       = hdmac_resources,
        .num_resources  = ARRAY_SIZE(hdmac_resources),
@@ -69,9 +64,15 @@ static struct platform_device at_hdmac_device = {
 
 void __init at91_add_device_hdmac(void)
 {
-       dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
-       dma_cap_set(DMA_SLAVE, atdma_pdata.cap_mask);
-       platform_device_register(&at_hdmac_device);
+#if defined(CONFIG_OF)
+       struct device_node *of_node =
+               of_find_node_by_name(NULL, "dma-controller");
+
+       if (of_node)
+               of_node_put(of_node);
+       else
+#endif
+               platform_device_register(&at_hdmac_device);
 }
 #else
 void __init at91_add_device_hdmac(void) {}
index 61908dc..9be71c1 100644 (file)
 #if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
 static u64 hdmac_dmamask = DMA_BIT_MASK(32);
 
-static struct at_dma_platform_data atdma_pdata = {
-       .nr_channels    = 2,
-};
-
 static struct resource hdmac_resources[] = {
        [0] = {
                .start  = AT91SAM9RL_BASE_DMA,
@@ -51,12 +47,11 @@ static struct resource hdmac_resources[] = {
 };
 
 static struct platform_device at_hdmac_device = {
-       .name           = "at_hdmac",
+       .name           = "at91sam9rl_dma",
        .id             = -1,
        .dev            = {
                                .dma_mask               = &hdmac_dmamask,
                                .coherent_dma_mask      = DMA_BIT_MASK(32),
-                               .platform_data          = &atdma_pdata,
        },
        .resource       = hdmac_resources,
        .num_resources  = ARRAY_SIZE(hdmac_resources),
@@ -64,7 +59,6 @@ static struct platform_device at_hdmac_device = {
 
 void __init at91_add_device_hdmac(void)
 {
-       dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
        platform_device_register(&at_hdmac_device);
 }
 #else
index d5fb44f..d67d0b4 100644 (file)
@@ -34,6 +34,7 @@
 #include <mach/ep93xx_spi.h>
 #include <mach/gpio-ep93xx.h>
 
+#include <asm/hardware/vic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
 #include <asm/mach/arch.h>
@@ -361,6 +362,7 @@ MACHINE_START(VISION_EP9307, "Vision Engraving Systems EP9307")
        .atag_offset    = 0x100,
        .map_io         = vision_map_io,
        .init_irq       = ep93xx_init_irq,
+       .handle_irq     = vic_handle_irq,
        .timer          = &ep93xx_timer,
        .init_machine   = vision_init_machine,
        .restart        = ep93xx_restart,
index 0fc65ff..3893995 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/i2c.h>
 #include <linux/gpio_keys.h>
 #include <linux/gpio.h>
+#include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/mfd/max8998.h>
 #include <linux/regulator/machine.h>
@@ -595,6 +596,7 @@ static struct mxt_platform_data qt602240_platform_data = {
        .threshold      = 0x28,
        .voltage        = 2800000,              /* 2.8V */
        .orient         = MXT_DIAGONAL,
+       .irqflags       = IRQF_TRIGGER_FALLING,
 };
 
 static struct i2c_board_info i2c3_devs[] __initdata = {
index 6c58266..719ee42 100644 (file)
@@ -343,6 +343,7 @@ static void __init omap3_check_revision(const char **cpu_rev)
        case 0xb944:
                omap_revision = AM335X_REV_ES1_0;
                *cpu_rev = "1.0";
+               break;
        case 0xb8f2:
                switch (rev) {
                case 0:
index 2cc1aa0..415a6f1 100644 (file)
@@ -420,8 +420,7 @@ static void __exit omap2_mbox_exit(void)
        platform_driver_unregister(&omap2_mbox_driver);
 }
 
-/* must be ready before omap3isp is probed */
-subsys_initcall(omap2_mbox_init);
+module_init(omap2_mbox_init);
 module_exit(omap2_mbox_exit);
 
 MODULE_LICENSE("GPL v2");
index b882204..ac49384 100644 (file)
@@ -150,7 +150,8 @@ err_out:
                platform_device_put(omap_iommu_pdev[i]);
        return err;
 }
-module_init(omap_iommu_init);
+/* must be ready before omap3isp is probed */
+subsys_initcall(omap_iommu_init);
 
 static void __exit omap_iommu_exit(void)
 {
index ebc5950..70de277 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "common.h"
 #include "omap4-sar-layout.h"
+#include <linux/export.h>
 
 #ifdef CONFIG_CACHE_L2X0
 static void __iomem *l2cache_base;
@@ -55,6 +56,7 @@ void omap_bus_sync(void)
                isb();
        }
 }
+EXPORT_SYMBOL(omap_bus_sync);
 
 /* Steal one page physical memory for barrier implementation */
 int __init omap_barrier_reserve_memblock(void)
index 10b20c6..4b57757 100644 (file)
@@ -270,7 +270,6 @@ static struct regulator_init_data omap4_vusb_idata = {
        .constraints = {
                .min_uV                 = 3300000,
                .max_uV                 = 3300000,
-               .apply_uV               = true,
                .valid_modes_mask       = REGULATOR_MODE_NORMAL
                                        | REGULATOR_MODE_STANDBY,
                .valid_ops_mask         = REGULATOR_CHANGE_MODE
index 0d729e6..42d5cca 100644 (file)
@@ -49,7 +49,6 @@ extern unsigned pxa3xx_get_clk_frequency_khz(int);
 #endif
 
 extern struct syscore_ops pxa_irq_syscore_ops;
-extern struct syscore_ops pxa_gpio_syscore_ops;
 extern struct syscore_ops pxa2xx_mfp_syscore_ops;
 extern struct syscore_ops pxa3xx_mfp_syscore_ops;
 
index f147755..29b62af 100644 (file)
@@ -226,6 +226,12 @@ static void __init pxa25x_mfp_init(void)
 {
        int i;
 
+       /* running before pxa_gpio_probe() */
+#ifdef CONFIG_CPU_PXA26x
+       pxa_last_gpio = 89;
+#else
+       pxa_last_gpio = 84;
+#endif
        for (i = 0; i <= pxa_last_gpio; i++)
                gpio_desc[i].valid = 1;
 
@@ -295,6 +301,7 @@ static void __init pxa27x_mfp_init(void)
 {
        int i, gpio;
 
+       pxa_last_gpio = 120;    /* running before pxa_gpio_probe() */
        for (i = 0; i <= pxa_last_gpio; i++) {
                /* skip GPIO2, 5, 6, 7, 8, they are not
                 * valid pins allow configuration
index 00d6eac..3352b37 100644 (file)
@@ -208,6 +208,7 @@ static struct clk_lookup pxa25x_clkregs[] = {
        INIT_CLKREG(&clk_pxa25x_gpio11, NULL, "GPIO11_CLK"),
        INIT_CLKREG(&clk_pxa25x_gpio12, NULL, "GPIO12_CLK"),
        INIT_CLKREG(&clk_pxa25x_mem, "pxa2xx-pcmcia", NULL),
+       INIT_CLKREG(&clk_dummy, "pxa-gpio", NULL),
 };
 
 static struct clk_lookup pxa25x_hwuart_clkreg =
@@ -367,7 +368,6 @@ static int __init pxa25x_init(void)
 
                register_syscore_ops(&pxa_irq_syscore_ops);
                register_syscore_ops(&pxa2xx_mfp_syscore_ops);
-               register_syscore_ops(&pxa_gpio_syscore_ops);
                register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
                ret = platform_add_devices(pxa25x_devices,
index c1673b3..6bce78e 100644 (file)
@@ -229,6 +229,7 @@ static struct clk_lookup pxa27x_clkregs[] = {
        INIT_CLKREG(&clk_pxa27x_im, NULL, "IMCLK"),
        INIT_CLKREG(&clk_pxa27x_memc, NULL, "MEMCLK"),
        INIT_CLKREG(&clk_pxa27x_mem, "pxa2xx-pcmcia", NULL),
+       INIT_CLKREG(&clk_dummy, "pxa-gpio", NULL),
 };
 
 #ifdef CONFIG_PM
@@ -455,7 +456,6 @@ static int __init pxa27x_init(void)
 
                register_syscore_ops(&pxa_irq_syscore_ops);
                register_syscore_ops(&pxa2xx_mfp_syscore_ops);
-               register_syscore_ops(&pxa_gpio_syscore_ops);
                register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
                ret = platform_add_devices(devices, ARRAY_SIZE(devices));
index 4f402af..3918a67 100644 (file)
@@ -462,7 +462,6 @@ static int __init pxa3xx_init(void)
 
                register_syscore_ops(&pxa_irq_syscore_ops);
                register_syscore_ops(&pxa3xx_mfp_syscore_ops);
-               register_syscore_ops(&pxa_gpio_syscore_ops);
                register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
                ret = platform_add_devices(devices, ARRAY_SIZE(devices));
index d082a58..5ce434b 100644 (file)
@@ -283,7 +283,6 @@ static int __init pxa95x_init(void)
                        return ret;
 
                register_syscore_ops(&pxa_irq_syscore_ops);
-               register_syscore_ops(&pxa_gpio_syscore_ops);
                register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
                ret = platform_add_devices(devices, ARRAY_SIZE(devices));
index db8a98a..0c1eb1d 100644 (file)
@@ -12,6 +12,6 @@
 #ifndef __ARCH_ARM_MACH_S3C2440_COMMON_H
 #define __ARCH_ARM_MACH_S3C2440_COMMON_H
 
-void s3c2440_restart(char mode, const char *cmd);
+void s3c244x_restart(char mode, const char *cmd);
 
 #endif /* __ARCH_ARM_MACH_S3C2440_COMMON_H */
index 2456955..19b577b 100644 (file)
@@ -487,5 +487,5 @@ MACHINE_START(ANUBIS, "Simtec-Anubis")
        .init_machine   = anubis_init,
        .init_irq       = s3c24xx_init_irq,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index d6a9763..d7ae49c 100644 (file)
@@ -222,5 +222,5 @@ MACHINE_START(AT2440EVB, "AT2440EVB")
        .init_machine   = at2440evb_init,
        .init_irq       = s3c24xx_init_irq,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 5859e60..9a4a5bc 100644 (file)
@@ -601,5 +601,5 @@ MACHINE_START(NEO1973_GTA02, "GTA02")
        .init_irq       = s3c24xx_init_irq,
        .init_machine   = gta02_machine_init,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index adbbb85..5d66fb2 100644 (file)
@@ -701,5 +701,5 @@ MACHINE_START(MINI2440, "MINI2440")
        .init_machine   = mini2440_init,
        .init_irq       = s3c24xx_init_irq,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 40eaf84..5198e3e 100644 (file)
@@ -158,5 +158,5 @@ MACHINE_START(NEXCODER_2440, "NexVision - Nexcoder 2440")
        .init_machine   = nexcoder_init,
        .init_irq       = s3c24xx_init_irq,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 4c480ef..c5daeb6 100644 (file)
@@ -436,5 +436,5 @@ MACHINE_START(OSIRIS, "Simtec-OSIRIS")
        .init_irq       = s3c24xx_init_irq,
        .init_machine   = osiris_init,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 80077f6..6f68abf 100644 (file)
@@ -822,5 +822,5 @@ MACHINE_START(RX1950, "HP iPAQ RX1950")
        .init_irq = s3c24xx_init_irq,
        .init_machine = rx1950_init_machine,
        .timer = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 20103ba..56af354 100644 (file)
@@ -213,5 +213,5 @@ MACHINE_START(RX3715, "IPAQ-RX3715")
        .init_irq       = rx3715_init_irq,
        .init_machine   = rx3715_init_machine,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 1deb60d..83a1036 100644 (file)
@@ -183,5 +183,5 @@ MACHINE_START(S3C2440, "SMDK2440")
        .map_io         = smdk2440_map_io,
        .init_machine   = smdk2440_machine_init,
        .timer          = &s3c24xx_timer,
-       .restart        = s3c2440_restart,
+       .restart        = s3c244x_restart,
 MACHINE_END
index 517623a..2b3dddb 100644 (file)
@@ -35,7 +35,6 @@
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
 #include <plat/pm.h>
-#include <plat/watchdog-reset.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -74,15 +73,3 @@ void __init s3c2440_map_io(void)
        s3c24xx_gpiocfg_default.set_pull = s3c24xx_gpio_setpull_1up;
        s3c24xx_gpiocfg_default.get_pull = s3c24xx_gpio_getpull_1up;
 }
-
-void s3c2440_restart(char mode, const char *cmd)
-{
-       if (mode == 's') {
-               soft_restart(0);
-       }
-
-       arch_wdt_reset();
-
-       /* we'll take a jump through zero as a poor second */
-       soft_restart(0);
-}
index 36bc60f..d15852f 100644 (file)
@@ -46,6 +46,7 @@
 #include <plat/pm.h>
 #include <plat/pll.h>
 #include <plat/nand-core.h>
+#include <plat/watchdog-reset.h>
 
 static struct map_desc s3c244x_iodesc[] __initdata = {
        IODESC_ENT(CLKPWR),
@@ -196,3 +197,14 @@ struct syscore_ops s3c244x_pm_syscore_ops = {
        .suspend        = s3c244x_suspend,
        .resume         = s3c244x_resume,
 };
+
+void s3c244x_restart(char mode, const char *cmd)
+{
+       if (mode == 's')
+               soft_restart(0);
+
+       arch_wdt_reset();
+
+       /* we'll take a jump through zero as a poor second */
+       soft_restart(0);
+}
index 068b754..8aea3a2 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_clk.h>
+#include <linux/videodev2.h>
 #include <video/sh_mobile_lcdc.h>
 #include <video/sh_mipi_dsi.h>
 #include <sound/sh_fsi.h>
index eeb4d96..b4718b0 100644 (file)
@@ -794,7 +794,7 @@ static struct fsi_ak4642_info fsi2_ak4643_info = {
 static struct platform_device fsi_ak4643_device = {
        .name   = "fsi-ak4642-audio",
        .dev    = {
-               .platform_data  = &fsi_info,
+               .platform_data  = &fsi2_ak4643_info,
        },
 };
 
index 4d22016..4bd1162 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/smsc911x.h>
+#include <linux/videodev2.h>
 #include <mach/common.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -241,7 +242,7 @@ static struct sh_mobile_lcdc_info lcdc0_info = {
        .clock_source   = LCDC_CLK_BUS,
        .ch[0] = {
                .chan                   = LCDC_CHAN_MAINLCD,
-               .bpp                    = 16,
+               .fourcc = V4L2_PIX_FMT_RGB565,
                .interface_type         = RGB24,
                .clock_divider          = 5,
                .flags                  = 0,
index a281324..7b53cda 100644 (file)
@@ -1352,6 +1352,10 @@ static struct map_desc mackerel_io_desc[] __initdata = {
 static void __init mackerel_map_io(void)
 {
        iotable_init(mackerel_io_desc, ARRAY_SIZE(mackerel_io_desc));
+       /* DMA memory at 0xff200000 - 0xffdfffff. The default 2MB size isn't
+        * enough to allocate the frame buffer memory.
+        */
+       init_consistent_dma_size(12 << 20);
 
        /* setup early devices and console here as well */
        sh7372_add_early_devices();
index 52af004..c59e8b8 100644 (file)
@@ -5,7 +5,7 @@ config UX500_SOC_COMMON
        default y
        select ARM_GIC
        select HAS_MTU
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select ARM_ERRATA_754322
        select ARM_ERRATA_764369
 
index 9b3d0fb..88c3ba1 100644 (file)
@@ -7,7 +7,7 @@ config ARCH_VEXPRESS_CA9X4
        select ARM_GIC
        select ARM_ERRATA_720789
        select ARM_ERRATA_751472
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select HAVE_SMP
        select MIGHT_HAVE_CACHE_L2X0
 
index 0404ccb..f1c8486 100644 (file)
@@ -230,9 +230,7 @@ __v7_setup:
        mcreq   p15, 0, r10, c15, c0, 1         @ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_743622
-       teq     r6, #0x20                       @ present in r2p0
-       teqne   r6, #0x21                       @ present in r2p1
-       teqne   r6, #0x22                       @ present in r2p2
+       teq     r5, #0x00200000                 @ only present in r2p*
        mrceq   p15, 0, r10, c15, c0, 1         @ read diagnostic register
        orreq   r10, r10, #1 << 6               @ set bit #6
        mcreq   p15, 0, r10, c15, c0, 1         @ write diagnostic register
index 2efd645..37bbbbb 100644 (file)
 #define OMAP_GPMC_NR_IRQS      8
 #define OMAP_GPMC_IRQ_END      (OMAP_GPMC_IRQ_BASE + OMAP_GPMC_NR_IRQS)
 
+/* PRCM IRQ handler */
+#ifdef CONFIG_ARCH_OMAP2PLUS
+#define OMAP_PRCM_IRQ_BASE     (OMAP_GPMC_IRQ_END)
+#define OMAP_PRCM_NR_IRQS      64
+#define OMAP_PRCM_IRQ_END      (OMAP_PRCM_IRQ_BASE + OMAP_PRCM_NR_IRQS)
+#else
+#define OMAP_PRCM_IRQ_END      OMAP_GPMC_IRQ_END
+#endif
 
-#define NR_IRQS                        OMAP_GPMC_IRQ_END
+#define NR_IRQS                        OMAP_PRCM_IRQ_END
 
 #define OMAP_IRQ_BIT(irq)      (1 << ((irq) % 32))
 
index 9fe3534..2bab4c9 100644 (file)
@@ -1249,7 +1249,7 @@ static void s3c2410_dma_resume(void)
        struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1;
        int channel;
 
-       for (channel = dma_channels - 1; channel >= 0; cp++, channel--)
+       for (channel = dma_channels - 1; channel >= 0; cp--, channel--)
                s3c2410_dma_resume_chan(cp);
 }
 
index f10768e..d21d744 100644 (file)
@@ -1409,7 +1409,7 @@ void __init s5p_ehci_set_platdata(struct s5p_ehci_platdata *pd)
 
 #ifdef CONFIG_S3C_DEV_USB_HSOTG
 static struct resource s3c_usb_hsotg_resources[] = {
-       [0] = DEFINE_RES_MEM(S3C_PA_USB_HSOTG, SZ_16K),
+       [0] = DEFINE_RES_MEM(S3C_PA_USB_HSOTG, SZ_128K),
        [1] = DEFINE_RES_IRQ(IRQ_OTG),
 };
 
index 0c77e42..abb5bde 100644 (file)
@@ -145,11 +145,13 @@ static void clockevent_set_mode(enum clock_event_mode mode,
 static int clockevent_next_event(unsigned long cycles,
                                 struct clock_event_device *clk_event_dev)
 {
-       u16 val;
+       u16 val = readw(gpt_base + CR(CLKEVT));
+
+       if (val & CTRL_ENABLE)
+               writew(val & ~CTRL_ENABLE, gpt_base + CR(CLKEVT));
 
        writew(cycles, gpt_base + LOAD(CLKEVT));
 
-       val = readw(gpt_base + CR(CLKEVT));
        val |= CTRL_ENABLE | CTRL_INT_ENABLE;
        writew(val, gpt_base + CR(CLKEVT));
 
index ea33957..92c5af9 100644 (file)
@@ -40,9 +40,7 @@ void cpu_idle(void)
                        cpu_idle_sleep();
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 8dd0416..a80a643 100644 (file)
@@ -94,9 +94,7 @@ void cpu_idle(void)
                        idle();
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 8154c4e..77ecbde 100644 (file)
@@ -122,8 +122,8 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long get_wchan(struct task_struct *p);
 
-#define KSTK_EIP(tsk)  (task_pt_regs(task)->pc)
-#define        KSTK_ESP(tsk)   (task_pt_regs(task)->sp)
+#define KSTK_EIP(task) (task_pt_regs(task)->pc)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
 #define cpu_relax()            do { } while (0)
 
index 3e977cc..30b37e5 100644 (file)
@@ -717,33 +717,6 @@ ENTRY(sys_ftruncate64_c6x)
 #endif
 ENDPROC(sys_ftruncate64_c6x)
 
-#ifdef __ARCH_WANT_SYSCALL_OFF_T
-;; On Entry
-;;   A4 - fd
-;;   B4 - offset_lo (LE), offset_hi (BE)
-;;   A6 - offset_lo (BE), offset_hi (LE)
-;;   B6 - len
-;;   A8 - advice
-ENTRY(sys_fadvise64_c6x)
-#ifdef CONFIG_C6X_BIG_KERNEL
-       MVKL    .S1     sys_fadvise64,A0
-       MVKH    .S1     sys_fadvise64,A0
-       BNOP    .S2X    A0,2
-#else
-       B       .S2     sys_fadvise64
-       NOP     2
-#endif
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       MV      .L2     B4,B5
- ||    MV      .D2X    A6,B4
-#else
-       MV      .D2X    A6,B5
-#endif
-       MV      .D1X    B6,A6
-       MV      .D2X    A8,B6
-#endif
-ENDPROC(sys_fadvise64_c6x)
-
 ;; On Entry
 ;;   A4 - fd
 ;;   B4 - offset_lo (LE), offset_hi (BE)
index aa585e4..d8f50ff 100644 (file)
@@ -115,9 +115,7 @@ void cpu_idle (void)
                                idle = default_idle;
                        idle();
                }
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index a69e015..c52ea55 100644 (file)
@@ -12,6 +12,4 @@
 #ifndef _ASM_PERF_EVENT_H
 #define _ASM_PERF_EVENT_H
 
-#define PERF_EVENT_INDEX_OFFSET        0
-
 #endif /* _ASM_PERF_EVENT_H */
index 3901df1..29cc497 100644 (file)
@@ -92,9 +92,7 @@ void cpu_idle(void)
                                idle();
                }
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 933bd38..1a173b3 100644 (file)
@@ -81,9 +81,7 @@ void cpu_idle(void)
        while (1) {
                while (!need_resched())
                        idle();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 6c2910f..8b8526b 100644 (file)
@@ -19,6 +19,4 @@
 #ifndef _ASM_PERF_EVENT_H
 #define _ASM_PERF_EVENT_H
 
-#define PERF_EVENT_INDEX_OFFSET        0
-
 #endif /* _ASM_PERF_EVENT_H */
index c871a2c..0123c63 100644 (file)
@@ -179,8 +179,6 @@ void __cpuinit start_secondary(void)
        printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu);
 
        set_cpu_online(cpu, true);
-       while (!cpumask_test_cpu(cpu, cpu_active_mask))
-               cpu_relax();
        local_irq_enable();
 
        cpu_idle();
index 32551d3..b149b88 100644 (file)
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu)
                pv_time_ops.init_missing_ticks_accounting(cpu);
 }
 
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
 
 static inline int
 paravirt_do_steal_accounting(unsigned long *new_itm)
index 1008682..1b22f6d 100644 (file)
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = {
  * pv_time_ops
  * time operations
  */
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
 
 static int
 ia64_native_do_steal_accounting(unsigned long *new_itm)
index 6d33c5c..9dc52b6 100644 (file)
@@ -330,9 +330,7 @@ cpu_idle (void)
                        normal_xtp();
 #endif
                }
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                check_pgt_cache();
                if (cpu_is_offline(cpu))
                        play_dead();
index 422bea9..3a4a32b 100644 (file)
@@ -90,9 +90,7 @@ void cpu_idle (void)
 
                        idle();
                }
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 099283e..fe4186b 100644 (file)
@@ -78,9 +78,7 @@ void cpu_idle(void)
        while (1) {
                while (!need_resched())
                        idle();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 5e1078c..f7fe6c3 100644 (file)
@@ -73,9 +73,7 @@ void cpu_idle(void)
        /* endless idle loop with no priority at all */
        while (1) {
                idle();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 7dcb5bf..9155f7d 100644 (file)
@@ -110,9 +110,7 @@ void cpu_idle(void)
                rcu_idle_exit();
                tick_nohz_idle_exit();
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                check_pgt_cache();
        }
 }
index 7da4d00..a7193ae 100644 (file)
@@ -146,7 +146,7 @@ static int __init alchemy_time_init(unsigned int m2int)
        cd->shift = 32;
        cd->mult = div_sc(32768, NSEC_PER_SEC, cd->shift);
        cd->max_delta_ns = clockevent_delta2ns(0xffffffff, cd);
-       cd->min_delta_ns = clockevent_delta2ns(8, cd);  /* ~0.25ms */
+       cd->min_delta_ns = clockevent_delta2ns(9, cd);  /* ~0.28ms */
        clockevents_register_device(cd);
        setup_irq(m2int, &au1x_rtcmatch2_irqaction);
 
index 24f5469..e215070 100644 (file)
@@ -96,7 +96,7 @@ void __init ath79_register_wmac(u8 *cal_data)
 {
        if (soc_is_ar913x())
                ar913x_wmac_setup();
-       if (soc_is_ar933x())
+       else if (soc_is_ar933x())
                ar933x_wmac_setup();
        else
                BUG();
index 4479fd6..28c6b27 100644 (file)
@@ -8,7 +8,7 @@ CONFIG_HIGH_RES_TIMERS=y
 # CONFIG_SECCOMP is not set
 CONFIG_USE_OF=y
 CONFIG_EXPERIMENTAL=y
-CONFIG_CROSS_COMPILE="mips-linux-gnu-"
+CONFIG_CROSS_COMPILE=""
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -22,7 +22,7 @@ CONFIG_AUDIT=y
 CONFIG_CGROUPS=y
 CONFIG_NAMESPACES=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs.xlp"
+CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_BZIP2=y
 CONFIG_RD_LZMA=y
 CONFIG_INITRAMFS_COMPRESSION_LZMA=y
index 7c68666..d0b857d 100644 (file)
@@ -8,7 +8,7 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_KEXEC=y
 CONFIG_EXPERIMENTAL=y
-CONFIG_CROSS_COMPILE="mips-linux-gnu-"
+CONFIG_CROSS_COMPILE=""
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -22,7 +22,7 @@ CONFIG_AUDIT=y
 CONFIG_NAMESPACES=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs.xlr"
+CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_BZIP2=y
 CONFIG_RD_LZMA=y
 CONFIG_INITRAMFS_COMPRESSION_GZIP=y
index 3b0b6e8..7fda0ce 100644 (file)
@@ -6,7 +6,7 @@ CONFIG_HZ_1000=y
 CONFIG_PREEMPT=y
 # CONFIG_SECCOMP is not set
 CONFIG_EXPERIMENTAL=y
-CONFIG_CROSS_COMPILE="mips-linux-"
+CONFIG_CROSS_COMPILE=""
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=16
index 1881b31..4d6d77e 100644 (file)
@@ -20,7 +20,7 @@
 #define WORD_INSN ".word"
 #endif
 
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
 {
        asm goto("1:\tnop\n\t"
                "nop\n\t"
index 556e1be..fb9975c 100644 (file)
@@ -11,6 +11,9 @@
 #include <asm/io.h>
 #include <asm/mach-au1x00/au1000.h>
 
+struct gpio;
+struct gpio_chip;
+
 /* with the current GPIC design, up to 128 GPIOs are possible.
  * The only implementation so far is in the Au1300, which has 75 externally
  * available GPIOs.
@@ -203,7 +206,22 @@ static inline int gpio_request(unsigned int gpio, const char *label)
        return 0;
 }
 
-static inline void gpio_free(unsigned int gpio)
+static inline int gpio_request_one(unsigned gpio,
+                                       unsigned long flags, const char *label)
+{
+       return 0;
+}
+
+static inline int gpio_request_array(struct gpio *array, size_t num)
+{
+       return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+}
+
+static inline void gpio_free_array(struct gpio *array, size_t num)
 {
 }
 
index d417909..da9bd7d 100644 (file)
@@ -39,9 +39,6 @@
 #define HPAGE_MASK     (~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
 #else /* !CONFIG_HUGETLB_PAGE */
-# ifndef BUILD_BUG
-#  define BUILD_BUG() do { extern void __build_bug(void); __build_bug(); } while (0)
-# endif
 #define HPAGE_SHIFT    ({BUILD_BUG(); 0; })
 #define HPAGE_SIZE     ({BUILD_BUG(); 0; })
 #define HPAGE_MASK     ({BUILD_BUG(); 0; })
index e3b897a..811084f 100644 (file)
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
 {
        int err = 0;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (event->attr.type) {
        case PERF_TYPE_RAW:
        case PERF_TYPE_HARDWARE:
index 7955409..61f1cb4 100644 (file)
@@ -80,9 +80,7 @@ void __noreturn cpu_idle(void)
 #endif
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 58fe71a..d5e950a 100644 (file)
@@ -8,7 +8,6 @@
  * SMP support for BMIPS
  */
 
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
index cc4a3f1..d79ae54 100644 (file)
@@ -1135,7 +1135,7 @@ asmlinkage void do_mt(struct pt_regs *regs)
                printk(KERN_DEBUG "YIELD Scheduler Exception\n");
                break;
        case 5:
-               printk(KERN_DEBUG "Gating Storage Schedulier Exception\n");
+               printk(KERN_DEBUG "Gating Storage Scheduler Exception\n");
                break;
        default:
                printk(KERN_DEBUG "*** UNKNOWN THREAD EXCEPTION %d ***\n",
index a81176f..924da5e 100644 (file)
@@ -69,7 +69,6 @@ SECTIONS
        RODATA
 
        /* writeable */
-       _sdata = .;                             /* Start of data section */
        .data : {       /* Data */
                . = . + DATAOFFSET;             /* for CONFIG_MAPPED_KERNEL */
 
index 937cf33..69ebd58 100644 (file)
@@ -42,6 +42,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
        const int field = sizeof(unsigned long) * 2;
        siginfo_t info;
        int fault;
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+                                                (write ? FAULT_FLAG_WRITE : 0);
 
 #if 0
        printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@@ -91,6 +93,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
        if (in_atomic() || !mm)
                goto bad_area_nosemaphore;
 
+retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (!vma)
@@ -144,7 +147,11 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault.
         */
-       fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+       fault = handle_mm_fault(mm, vma, address, flags);
+
+       if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+               return;
+
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
@@ -153,12 +160,27 @@ good_area:
                        goto do_sigbus;
                BUG();
        }
-       if (fault & VM_FAULT_MAJOR) {
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
-               tsk->maj_flt++;
-       } else {
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-               tsk->min_flt++;
+       if (flags & FAULT_FLAG_ALLOW_RETRY) {
+               if (fault & VM_FAULT_MAJOR) {
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+                                                 regs, address);
+                       tsk->maj_flt++;
+               } else {
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+                                                 regs, address);
+                       tsk->min_flt++;
+               }
+               if (fault & VM_FAULT_RETRY) {
+                       flags &= ~FAULT_FLAG_ALLOW_RETRY;
+
+                       /*
+                        * No need to up_read(&mm->mmap_sem) as we would
+                        * have already released it in __lock_page_or_retry
+                        * in mm/filemap.c.
+                        */
+
+                       goto retry;
+               }
        }
 
        up_read(&mm->mmap_sem);
index aec2b11..1552150 100644 (file)
@@ -279,7 +279,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
        /* Propagate hose info into the subordinate devices.  */
 
-       struct list_head *ln;
        struct pci_dev *dev = bus->self;
 
        if (pci_probe_only && dev &&
@@ -288,9 +287,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
                pcibios_fixup_device_resources(dev, bus);
        }
 
-       for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
-               dev = pci_dev_b(ln);
-
+       list_for_each_entry(dev, &bus->devices, bus_list) {
                if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
                        pcibios_fixup_device_resources(dev, bus);
        }
index 86b98e9..62ead66 100644 (file)
  */
 void __init titan_ht_pcibios_fixup_bus(struct pci_bus *bus)
 {
-       struct pci_bus *current_bus = bus;
-       struct pci_dev *devices;
-       struct list_head *devices_link;
-
-       list_for_each(devices_link, &(current_bus->devices)) {
-               devices = pci_dev_b(devices_link);
-               if (devices == NULL)
-                       continue;
-       }
-
        /*
         * PLX and SPKT related changes go here
         */
index 8e93b21..4642f56 100644 (file)
@@ -102,7 +102,7 @@ static int __init tx_7segled_init_sysfs(void)
                        break;
                }
                dev->id = i;
-               dev->dev = &tx_7segled_subsys;
+               dev->bus = &tx_7segled_subsys;
                error = device_register(dev);
                if (!error) {
                        device_create_file(dev, &dev_attr_ascii);
index 28eec31..cac401d 100644 (file)
@@ -123,9 +123,7 @@ void cpu_idle(void)
                        idle();
                }
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 62c60b8..d4b94b3 100644 (file)
@@ -71,9 +71,7 @@ void cpu_idle(void)
        while (1) {
                while (!need_resched())
                        barrier();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                check_pgt_cache();
        }
 }
index 938986e..ae098c4 100644 (file)
@@ -17,7 +17,7 @@
 #define JUMP_ENTRY_TYPE                stringify_in_c(FTR_ENTRY_LONG)
 #define JUMP_LABEL_NOP_SIZE    4
 
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
 {
        asm goto("1:\n\t"
                 "nop\n\t"
index 8f1df12..1a8093f 100644 (file)
@@ -61,8 +61,6 @@ struct pt_regs;
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 
-#define PERF_EVENT_INDEX_OFFSET        1
-
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
index 0a48bf5..c97fc60 100644 (file)
@@ -101,11 +101,11 @@ void cpu_idle(void)
                ppc64_runlatch_on();
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               if (cpu_should_die())
+               if (cpu_should_die()) {
+                       sched_preempt_enable_no_resched();
                        cpu_die();
-               schedule();
-               preempt_disable();
+               }
+               schedule_preempt_disabled();
        }
 }
 
index 64483fd..c2e27ed 100644 (file)
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
        if (!ppmu)
                return -ENOENT;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (event->attr.type) {
        case PERF_TYPE_HARDWARE:
                ev = event->attr.config;
@@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event)
        return err;
 }
 
+static int power_pmu_event_idx(struct perf_event *event)
+{
+       return event->hw.idx;
+}
+
 struct pmu power_pmu = {
        .pmu_enable     = power_pmu_enable,
        .pmu_disable    = power_pmu_disable,
@@ -1205,6 +1214,7 @@ struct pmu power_pmu = {
        .start_txn      = power_pmu_start_txn,
        .cancel_txn     = power_pmu_cancel_txn,
        .commit_txn     = power_pmu_commit_txn,
+       .event_idx      = power_pmu_event_idx,
 };
 
 /*
index 8fc6258..a5fbf4c 100644 (file)
@@ -584,9 +584,7 @@ static void iseries_shared_idle(void)
                if (hvlpevent_is_pending())
                        process_iSeries_events();
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
@@ -615,9 +613,7 @@ static void iseries_dedicated_idle(void)
                ppc64_runlatch_on();
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 95a6cf2..6c32190 100644 (file)
@@ -13,7 +13,7 @@
 #define ASM_ALIGN ".balign 4"
 #endif
 
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
 {
        asm goto("0:    brcl 0,0\n"
                ".pushsection __jump_table, \"aw\"\n"
index a75f168..4eb444e 100644 (file)
@@ -6,4 +6,3 @@
 
 /* Empty, just to avoid compiling error */
 
-#define PERF_EVENT_INDEX_OFFSET 0
index b9a7fdd..e30b2df 100644 (file)
@@ -165,13 +165,6 @@ static inline int ext_hash(u16 code)
        return (code + (code >> 9)) & 0xff;
 }
 
-static void ext_int_hash_update(struct rcu_head *head)
-{
-       struct ext_int_info *p = container_of(head, struct ext_int_info, rcu);
-
-       kfree(p);
-}
-
 int register_external_interrupt(u16 code, ext_int_handler_t handler)
 {
        struct ext_int_info *p;
@@ -202,7 +195,7 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
        list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
                if (p->code == code && p->handler == handler) {
                        list_del_rcu(&p->entry);
-                       call_rcu(&p->rcu, ext_int_hash_update);
+                       kfree_rcu(p, rcu);
                }
        spin_unlock_irqrestore(&ext_int_hash_lock, flags);
        return 0;
index e795933..7618085 100644 (file)
@@ -97,9 +97,7 @@ void cpu_idle(void)
                tick_nohz_idle_exit();
                if (test_thread_flag(TIF_MCCK_PENDING))
                        s390_handle_mcck();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 2398ce6..b0e28c4 100644 (file)
@@ -550,12 +550,6 @@ int __cpuinit start_secondary(void *cpuvoid)
        S390_lowcore.restart_psw.addr =
                PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
        __ctl_set_bit(0, 28); /* Enable lowcore protection */
-       /*
-        * Wait until the cpu which brought this one up marked it
-        * active before enabling interrupts.
-        */
-       while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
-               cpu_relax();
        local_irq_enable();
        /* cpu_idle will call schedule for us */
        cpu_idle();
index 25d0803..2707023 100644 (file)
@@ -53,9 +53,7 @@ void __noreturn cpu_idle(void)
                while (!need_resched())
                        barrier();
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 3ead9e6..4568066 100644 (file)
@@ -66,6 +66,7 @@ vector        =       0
        .long   exception_entry0 + vector * 6
 vector =       vector + 1
        .endr
+vector =       0
        .rept   256
        .long   exception_entry1 + vector * 6
 vector =       vector + 1
index 0fbff14..0bd21c8 100644 (file)
@@ -79,7 +79,7 @@ struct clk div4_clks[DIV4_NR] = {
 #define MSTPCR1                0xffc80034
 #define MSTPCR2                0xffc10028
 
-enum { MSTP004, MSTP000, MSTP114, MSTP113, MSTP112,
+enum { MSTP004, MSTP000, MSTP127, MSTP114, MSTP113, MSTP112,
        MSTP111, MSTP110, MSTP103, MSTP102, MSTP220,
        MSTP_NR };
 
@@ -89,6 +89,7 @@ static struct clk mstp_clks[MSTP_NR] = {
        [MSTP000] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 0, 0),
 
        /* MSTPCR1 */
+       [MSTP127] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 27, 0),
        [MSTP114] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 14, 0),
        [MSTP113] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 13, 0),
        [MSTP112] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 12, 0),
@@ -131,6 +132,7 @@ static struct clk_lookup lookups[] = {
        CLKDEV_CON_ID("usb_fck", &mstp_clks[MSTP103]),
        CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[MSTP102]),
        CLKDEV_CON_ID("mmc0", &mstp_clks[MSTP220]),
+       CLKDEV_CON_ID("rspi2", &mstp_clks[MSTP127]),
 };
 
 int __init arch_clk_init(void)
index e5b420c..2b31443 100644 (file)
@@ -156,7 +156,7 @@ static struct clk_lookup lookups[] = {
        CLKDEV_CON_ID("siof_fck", &mstp_clks[MSTP003]),
        CLKDEV_CON_ID("hspi_fck", &mstp_clks[MSTP002]),
        CLKDEV_CON_ID("hudi_fck", &mstp_clks[MSTP119]),
-       CLKDEV_CON_ID("ubc_fck", &mstp_clks[MSTP117]),
+       CLKDEV_CON_ID("ubc0", &mstp_clks[MSTP117]),
        CLKDEV_CON_ID("dmac_11_6_fck", &mstp_clks[MSTP105]),
        CLKDEV_CON_ID("dmac_5_0_fck", &mstp_clks[MSTP104]),
        CLKDEV_CON_ID("gdta_fck", &mstp_clks[MSTP100]),
index 406508d..7e48928 100644 (file)
@@ -114,9 +114,7 @@ void cpu_idle(void)
 
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 10b14e3..068b8a2 100644 (file)
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
 {
        int err;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (event->attr.type) {
        case PERF_TYPE_RAW:
        case PERF_TYPE_HW_CACHE:
index ad1fb5d..eddcfb3 100644 (file)
@@ -31,7 +31,7 @@ UTS_MACHINE    := sparc
 
 #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
 KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
-KBUILD_AFLAGS += -m32
+KBUILD_AFLAGS += -m32 -Wa,-Av8
 
 #LDFLAGS_vmlinux = -N -Ttext 0xf0004000
 #  Since 2.5.40, the first stage is left not btfix-ed.
index fc73a82..5080d16 100644 (file)
@@ -7,7 +7,7 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
 {
                asm goto("1:\n\t"
                         "nop\n\t"
index 614da62..8e16a4a 100644 (file)
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
        if (atomic_read(&nmi_active) < 0)
                return -ENODEV;
 
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (attr->type) {
        case PERF_TYPE_HARDWARE:
                if (attr->config >= sparc_pmu->max_events)
index f793742..935fdbc 100644 (file)
@@ -113,9 +113,7 @@ void cpu_idle(void)
                        while (!need_resched())
                                cpu_relax();
                }
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                check_pgt_cache();
        }
 }
@@ -138,9 +136,7 @@ void cpu_idle(void)
                        while (!need_resched())
                                cpu_relax();
                }
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                check_pgt_cache();
        }
 }
index 39d8b05..06b5b5f 100644 (file)
@@ -104,15 +104,13 @@ void cpu_idle(void)
                rcu_idle_exit();
                tick_nohz_idle_exit();
 
-               preempt_enable_no_resched();
-
 #ifdef CONFIG_HOTPLUG_CPU
-               if (cpu_is_offline(cpu))
+               if (cpu_is_offline(cpu)) {
+                       sched_preempt_enable_no_resched();
                        cpu_play_dead();
+               }
 #endif
-
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index dafdbba..b8d99ac 100644 (file)
-#
-# Automatically generated make config: don't edit
-# Linux/tilegx 2.6.39-rc5 Kernel Configuration
-# Wed May  4 11:08:04 2011
-#
-CONFIG_TILE=y
-CONFIG_MMU=y
-CONFIG_GENERIC_CSUM=y
-CONFIG_SEMAPHORE_SLEEPERS=y
-CONFIG_HAVE_ARCH_ALLOC_REMAP=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_SYS_SUPPORTS_HUGETLBFS=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-CONFIG_DEFAULT_MIGRATION_COST=10000000
-CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
-CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_SMP=y
-# CONFIG_DEBUG_COPY_FROM_USER is not set
-CONFIG_HVC_TILE=y
 CONFIG_TILEGX=y
-CONFIG_64BIT=y
-CONFIG_ARCH_DEFCONFIG="arch/tile/configs/tilegx_defconfig"
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-CONFIG_CONSTRUCTORS=y
-
-#
-# General setup
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_CROSS_COMPILE=""
-CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
-# CONFIG_FHANDLE is not set
+CONFIG_FHANDLE=y
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
-CONFIG_HAVE_GENERIC_HARDIRQS=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-# CONFIG_PREEMPT_RCU is not set
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_FANOUT=64
-# CONFIG_RCU_FANOUT_EXACT is not set
-# CONFIG_RCU_FAST_NO_HZ is not set
-# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=19
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_DEBUG=y
-CONFIG_CGROUP_NS=y
-# CONFIG_CGROUP_FREEZER is not set
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_MEM_RES_CTLR=y
 CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
-CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED=y
 CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_BLK_CGROUP=y
-# CONFIG_DEBUG_BLK_CGROUP is not set
 CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
-# CONFIG_SCHED_AUTOGROUP is not set
-CONFIG_MM_OWNER=y
-# CONFIG_SYSFS_DEPRECATED is not set
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="usr/contents.txt"
-CONFIG_INITRAMFS_ROOT_UID=0
-CONFIG_INITRAMFS_ROOT_GID=0
-CONFIG_RD_GZIP=y
-# CONFIG_RD_BZIP2 is not set
-# CONFIG_RD_LZMA is not set
-# CONFIG_RD_XZ is not set
-# CONFIG_RD_LZO is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
-# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-CONFIG_ANON_INODES=y
-CONFIG_EXPERT=y
 CONFIG_SYSCTL_SYSCALL=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
 CONFIG_EMBEDDED=y
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_PCI_QUIRKS=y
-CONFIG_SLUB_DEBUG=y
 # CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
 CONFIG_PROFILING=y
-CONFIG_USE_GENERIC_SMP_HELPERS=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
-CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_STOP_MACHINE=y
-CONFIG_BLOCK=y
-CONFIG_BLK_DEV_BSG=y
 CONFIG_BLK_DEV_INTEGRITY=y
-# CONFIG_BLK_DEV_THROTTLING is not set
-CONFIG_BLOCK_COMPAT=y
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
 CONFIG_CFQ_GROUP_IOSCHED=y
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_PADATA=y
-# CONFIG_INLINE_SPIN_TRYLOCK is not set
-# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK is not set
-# CONFIG_INLINE_SPIN_LOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
-CONFIG_INLINE_SPIN_UNLOCK=y
-# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
-# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_READ_TRYLOCK is not set
-# CONFIG_INLINE_READ_LOCK is not set
-# CONFIG_INLINE_READ_LOCK_BH is not set
-# CONFIG_INLINE_READ_LOCK_IRQ is not set
-# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
-CONFIG_INLINE_READ_UNLOCK=y
-# CONFIG_INLINE_READ_UNLOCK_BH is not set
-CONFIG_INLINE_READ_UNLOCK_IRQ=y
-# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_WRITE_TRYLOCK is not set
-# CONFIG_INLINE_WRITE_LOCK is not set
-# CONFIG_INLINE_WRITE_LOCK_BH is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
-CONFIG_INLINE_WRITE_UNLOCK=y
-# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
-# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-
-#
-# Tilera-specific configuration
-#
 CONFIG_NR_CPUS=100
-CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
-CONFIG_SCHED_HRTICK=y
-# CONFIG_KEXEC is not set
-CONFIG_COMPAT=y
-CONFIG_SYSVIPC_COMPAT=y
-# CONFIG_HIGHMEM is not set
-CONFIG_NUMA=y
-CONFIG_NODES_SHIFT=2
-CONFIG_PAGE_OFFSET=0xC0000000
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_DISCONTIGMEM_MANUAL=y
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_COMPACTION is not set
-CONFIG_MIGRATION=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_ZONE_DMA_FLAG=0
-CONFIG_VIRT_TO_BUS=y
-# CONFIG_KSM is not set
-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_VMALLOC_RESERVE=0x1000000
-CONFIG_HARDWALL=y
-CONFIG_KERNEL_PL=1
-
-#
-# Bus options
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_NO_IOMEM is not set
-# CONFIG_NO_IOPORT is not set
-# CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_DEBUG=y
-# CONFIG_PCI_STUB is not set
-# CONFIG_PCI_IOV is not set
-# CONFIG_HOTPLUG_PCI is not set
-
-#
-# Executable file formats
-#
-CONFIG_KCORE_ELF=y
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-# CONFIG_HAVE_AOUT is not set
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
-
-#
-# Networking options
-#
 CONFIG_PACKET=y
 CONFIG_UNIX=y
-CONFIG_XFRM=y
 CONFIG_XFRM_USER=y
 CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
 CONFIG_XFRM_STATISTICS=y
-CONFIG_XFRM_IPCOMP=m
 CONFIG_NET_KEY=m
 CONFIG_NET_KEY_MIGRATE=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
-# CONFIG_IP_FIB_TRIE_STATS is not set
 CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_ROUTE_MULTIPATH=y
 CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_ROUTE_CLASSID=y
-# CONFIG_IP_PNP is not set
 CONFIG_NET_IPIP=m
-# CONFIG_NET_IPGRE_DEMUX is not set
 CONFIG_IP_MROUTE=y
-# CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
-# CONFIG_ARPD is not set
 CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_INET_LRO=y
 CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
 CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
 CONFIG_TCP_CONG_HSTCP=m
 CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
 CONFIG_TCP_CONG_SCALABLE=m
 CONFIG_TCP_CONG_LP=m
 CONFIG_TCP_CONG_VENO=m
 CONFIG_TCP_CONG_YEAH=m
 CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
 CONFIG_IPV6_PRIVACY=y
@@ -342,108 +98,60 @@ CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
 CONFIG_INET6_XFRM_MODE_TRANSPORT=m
 CONFIG_INET6_XFRM_MODE_TUNNEL=m
 CONFIG_INET6_XFRM_MODE_BEET=m
 CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
 CONFIG_IPV6_SIT=m
-# CONFIG_IPV6_SIT_6RD is not set
-CONFIG_IPV6_NDISC_NODETYPE=y
 CONFIG_IPV6_TUNNEL=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
-# CONFIG_IPV6_SUBTREES is not set
 CONFIG_IPV6_MROUTE=y
-# CONFIG_IPV6_MROUTE_MULTIPLE_TABLES is not set
 CONFIG_IPV6_PIMSM_V2=y
 CONFIG_NETLABEL=y
-CONFIG_NETWORK_SECMARK=y
-# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
 CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=y
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NF_CONNTRACK=y
-CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
 CONFIG_NF_CONNTRACK_ZONES=y
 CONFIG_NF_CONNTRACK_EVENTS=y
-# CONFIG_NF_CONNTRACK_TIMESTAMP is not set
 CONFIG_NF_CT_PROTO_DCCP=m
-CONFIG_NF_CT_PROTO_GRE=m
-CONFIG_NF_CT_PROTO_SCTP=m
 CONFIG_NF_CT_PROTO_UDPLITE=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m
 CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_BROADCAST=m
 CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-# CONFIG_NF_CONNTRACK_SNMP is not set
 CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
-# CONFIG_NF_CT_NETLINK is not set
 CONFIG_NETFILTER_TPROXY=m
-CONFIG_NETFILTER_XTABLES=y
-
-#
-# Xtables combined modules
-#
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_CONNMARK=m
-
-#
-# Xtables targets
-#
-# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set
-# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
 CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
 CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
 CONFIG_NETFILTER_XT_TARGET_CT=m
 CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
 CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
 CONFIG_NETFILTER_XT_TARGET_MARK=m
 CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
 CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_SECMARK=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-
-#
-# Xtables matches
-#
-# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set
 CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
 CONFIG_NETFILTER_XT_MATCH_COMMENT=m
 CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
 CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
-# CONFIG_NETFILTER_XT_MATCH_CPU is not set
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
 CONFIG_NETFILTER_XT_MATCH_DCCP=m
-# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set
 CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
 CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
 CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
 CONFIG_NETFILTER_XT_MATCH_IPVS=m
 CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -460,55 +168,29 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
 CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
-# CONFIG_IP_SET is not set
 CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=12
-
-#
-# IPVS transport protocol load balancing support
-#
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
 CONFIG_IP_VS_PROTO_ESP=y
 CONFIG_IP_VS_PROTO_AH=y
 CONFIG_IP_VS_PROTO_SCTP=y
-
-#
-# IPVS scheduler
-#
 CONFIG_IP_VS_RR=m
 CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_LC=m
 CONFIG_IP_VS_WLC=m
 CONFIG_IP_VS_LBLC=m
 CONFIG_IP_VS_LBLCR=m
-# CONFIG_IP_VS_DH is not set
-# CONFIG_IP_VS_SH is not set
 CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
-
-#
-# IPVS application helper
-#
-# CONFIG_IP_VS_NFCT is not set
-# CONFIG_IP_VS_PE_SIP is not set
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=y
-CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_CONNTRACK_IPV4=m
 # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=y
@@ -519,9 +201,7 @@ CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
 CONFIG_IP_NF_TARGET_LOG=m
 CONFIG_IP_NF_TARGET_ULOG=m
-# CONFIG_NF_NAT is not set
 CONFIG_IP_NF_MANGLE=m
-# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -529,11 +209,6 @@ CONFIG_IP_NF_SECURITY=m
 CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV6=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
@@ -574,57 +249,20 @@ CONFIG_BRIDGE_EBT_SNAT=m
 CONFIG_BRIDGE_EBT_LOG=m
 CONFIG_BRIDGE_EBT_ULOG=m
 CONFIG_BRIDGE_EBT_NFLOG=m
-# CONFIG_IP_DCCP is not set
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_MSG is not set
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_HMAC_NONE is not set
-# CONFIG_SCTP_HMAC_SHA1 is not set
-CONFIG_SCTP_HMAC_MD5=y
 CONFIG_RDS=m
 CONFIG_RDS_TCP=m
-# CONFIG_RDS_DEBUG is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_L2TP is not set
-CONFIG_STP=m
-CONFIG_GARP=m
 CONFIG_BRIDGE=m
-CONFIG_BRIDGE_IGMP_SNOOPING=y
 CONFIG_NET_DSA=y
-CONFIG_NET_DSA_TAG_DSA=y
-CONFIG_NET_DSA_TAG_EDSA=y
-CONFIG_NET_DSA_TAG_TRAILER=y
-CONFIG_NET_DSA_MV88E6XXX=y
-CONFIG_NET_DSA_MV88E6060=y
-CONFIG_NET_DSA_MV88E6XXX_NEED_PPU=y
-CONFIG_NET_DSA_MV88E6131=y
-CONFIG_NET_DSA_MV88E6123_61_65=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
-# CONFIG_DECNET is not set
-CONFIG_LLC=m
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
 CONFIG_PHONET=m
-# CONFIG_IEEE802154 is not set
 CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
 CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_MULTIQ=m
 CONFIG_NET_SCH_RED=m
-# CONFIG_NET_SCH_SFB is not set
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
 CONFIG_NET_SCH_TBF=m
@@ -632,14 +270,7 @@ CONFIG_NET_SCH_GRED=m
 CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_DRR=m
-# CONFIG_NET_SCH_MQPRIO is not set
-# CONFIG_NET_SCH_CHOKE is not set
 CONFIG_NET_SCH_INGRESS=m
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
@@ -652,7 +283,6 @@ CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_FLOW=m
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
 CONFIG_NET_EMATCH_CMP=m
 CONFIG_NET_EMATCH_NBYTE=m
 CONFIG_NET_EMATCH_U32=m
@@ -668,307 +298,46 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-# CONFIG_NET_ACT_CSUM is not set
 CONFIG_NET_CLS_IND=y
-CONFIG_NET_SCH_FIFO=y
 CONFIG_DCB=y
-CONFIG_DNS_RESOLVER=y
-# CONFIG_BATMAN_ADV is not set
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_CAN is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
-CONFIG_FIB_RULES=y
 # CONFIG_WIRELESS is not set
-# CONFIG_WIMAX is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-# CONFIG_CAIF is not set
-# CONFIG_CEPH_LIB is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_SYS_HYPERVISOR is not set
 CONFIG_CONNECTOR=y
-CONFIG_PROC_EVENTS=y
-# CONFIG_MTD is not set
-# CONFIG_PARPORT is not set
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_CRYPTOLOOP=m
-# CONFIG_BLK_DEV_DRBD is not set
-# CONFIG_BLK_DEV_NBD is not set
 CONFIG_BLK_DEV_SX8=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
-# CONFIG_BLK_DEV_XIP is not set
-# CONFIG_CDROM_PKTCDVD is not set
-CONFIG_ATA_OVER_ETH=y
-# CONFIG_BLK_DEV_RBD is not set
-# CONFIG_SENSORS_LIS3LV02D is not set
-CONFIG_MISC_DEVICES=y
-# CONFIG_AD525X_DPOT is not set
-# CONFIG_PHANTOM is not set
-# CONFIG_SGI_IOC4 is not set
-# CONFIG_TIFM_CORE is not set
-# CONFIG_ICS932S401 is not set
-# CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_HP_ILO is not set
-# CONFIG_APDS9802ALS is not set
-# CONFIG_ISL29003 is not set
-# CONFIG_ISL29020 is not set
-# CONFIG_SENSORS_TSL2550 is not set
-# CONFIG_SENSORS_BH1780 is not set
-# CONFIG_SENSORS_BH1770 is not set
-# CONFIG_SENSORS_APDS990X is not set
-# CONFIG_HMC6352 is not set
-# CONFIG_DS1682 is not set
-# CONFIG_BMP085 is not set
-# CONFIG_PCH_PHUB is not set
-# CONFIG_C2PORT is not set
-
-#
-# EEPROM support
-#
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
-# CONFIG_EEPROM_MAX6875 is not set
-# CONFIG_EEPROM_93CX6 is not set
-# CONFIG_CB710_CORE is not set
-
-#
-# Texas Instruments shared transport line discipline
-#
-# CONFIG_SENSORS_LIS3_I2C is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=m
+CONFIG_ATA_OVER_ETH=m
 CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=m
-CONFIG_SCSI_DMA=y
 CONFIG_SCSI_TGT=m
-# CONFIG_SCSI_NETLINK is not set
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=m
-# CONFIG_CHR_DEV_ST is not set
-# CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
-# CONFIG_CHR_DEV_SCH is not set
-# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
-# CONFIG_SCSI_SCAN_ASYNC is not set
-CONFIG_SCSI_WAIT_SCAN=m
-
-#
-# SCSI Transports
-#
-# CONFIG_SCSI_SPI_ATTRS is not set
-# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-CONFIG_SCSI_SAS_ATTRS=m
-# CONFIG_SCSI_SAS_LIBSAS is not set
-# CONFIG_SCSI_SRP_ATTRS is not set
-CONFIG_SCSI_LOWLEVEL=y
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_ISCSI_BOOT_SYSFS is not set
-# CONFIG_SCSI_CXGB3_ISCSI is not set
-# CONFIG_SCSI_CXGB4_ISCSI is not set
-# CONFIG_SCSI_BNX2_ISCSI is not set
-# CONFIG_SCSI_BNX2X_FCOE is not set
-# CONFIG_BE2ISCSI is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_HPSA is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_3W_SAS is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_SCSI_AIC94XX is not set
-# CONFIG_SCSI_MVSAS is not set
-# CONFIG_SCSI_DPT_I2O is not set
-# CONFIG_SCSI_ADVANSYS is not set
-# CONFIG_SCSI_ARCMSR is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_MPT2SAS is not set
-# CONFIG_SCSI_HPTIOP is not set
-# CONFIG_LIBFC is not set
-# CONFIG_LIBFCOE is not set
-# CONFIG_FCOE is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_STEX is not set
-# CONFIG_SCSI_SYM53C8XX_2 is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_1280 is not set
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_QLA_ISCSI is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_PMCRAID is not set
-# CONFIG_SCSI_PM8001 is not set
-# CONFIG_SCSI_SRP is not set
-# CONFIG_SCSI_BFA_FC is not set
-# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
-# CONFIG_SCSI_DH is not set
-# CONFIG_SCSI_OSD_INITIATOR is not set
-CONFIG_ATA=m
-# CONFIG_ATA_NONSTANDARD is not set
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_SATA_PMP=y
-
-#
-# Controllers with non-SFF native interface
-#
-# CONFIG_SATA_AHCI is not set
-# CONFIG_SATA_AHCI_PLATFORM is not set
-# CONFIG_SATA_INIC162X is not set
-# CONFIG_SATA_ACARD_AHCI is not set
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-
-#
-# SFF controllers with custom DMA interface
-#
-# CONFIG_PDC_ADMA is not set
-# CONFIG_SATA_QSTOR is not set
-# CONFIG_SATA_SX4 is not set
-CONFIG_ATA_BMDMA=y
-
-#
-# SATA SFF controllers with BMDMA
-#
-# CONFIG_ATA_PIIX is not set
-# CONFIG_SATA_MV is not set
-# CONFIG_SATA_NV is not set
-# CONFIG_SATA_PROMISE is not set
-# CONFIG_SATA_SIL is not set
-# CONFIG_SATA_SIS is not set
-# CONFIG_SATA_SVW is not set
-# CONFIG_SATA_ULI is not set
-# CONFIG_SATA_VIA is not set
-# CONFIG_SATA_VITESSE is not set
-
-#
-# PATA SFF controllers with BMDMA
-#
-# CONFIG_PATA_ALI is not set
-# CONFIG_PATA_AMD is not set
-# CONFIG_PATA_ARASAN_CF is not set
-# CONFIG_PATA_ARTOP is not set
-# CONFIG_PATA_ATIIXP is not set
-# CONFIG_PATA_ATP867X is not set
-# CONFIG_PATA_CMD64X is not set
-# CONFIG_PATA_CS5520 is not set
-# CONFIG_PATA_CS5530 is not set
-# CONFIG_PATA_CS5536 is not set
-# CONFIG_PATA_CYPRESS is not set
-# CONFIG_PATA_EFAR is not set
-# CONFIG_PATA_HPT366 is not set
-# CONFIG_PATA_HPT37X is not set
-# CONFIG_PATA_HPT3X2N is not set
-# CONFIG_PATA_HPT3X3 is not set
-# CONFIG_PATA_IT8213 is not set
-# CONFIG_PATA_IT821X is not set
-# CONFIG_PATA_JMICRON is not set
-# CONFIG_PATA_MARVELL is not set
-# CONFIG_PATA_NETCELL is not set
-# CONFIG_PATA_NINJA32 is not set
-# CONFIG_PATA_NS87415 is not set
-# CONFIG_PATA_OLDPIIX is not set
-# CONFIG_PATA_OPTIDMA is not set
-# CONFIG_PATA_PDC2027X is not set
-# CONFIG_PATA_PDC_OLD is not set
-# CONFIG_PATA_RADISYS is not set
-# CONFIG_PATA_RDC is not set
-# CONFIG_PATA_SC1200 is not set
-# CONFIG_PATA_SCH is not set
-# CONFIG_PATA_SERVERWORKS is not set
-# CONFIG_PATA_SIL680 is not set
-# CONFIG_PATA_SIS is not set
-# CONFIG_PATA_TOSHIBA is not set
-# CONFIG_PATA_TRIFLEX is not set
-# CONFIG_PATA_VIA is not set
-# CONFIG_PATA_WINBOND is not set
-
-#
-# PIO-only SFF controllers
-#
-# CONFIG_PATA_CMD640_PCI is not set
-# CONFIG_PATA_MPIIX is not set
-# CONFIG_PATA_NS87410 is not set
-# CONFIG_PATA_OPTI is not set
-# CONFIG_PATA_PLATFORM is not set
-# CONFIG_PATA_RZ1000 is not set
-
-#
-# Generic fallback / legacy drivers
-#
-# CONFIG_ATA_GENERIC is not set
-# CONFIG_PATA_LEGACY is not set
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_MVSAS=y
+# CONFIG_SCSI_MVSAS_DEBUG is not set
+CONFIG_SCSI_MVSAS_TASKLET=y
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=y
-CONFIG_MD_AUTODETECT=y
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
 CONFIG_MULTICORE_RAID456=y
-# CONFIG_MD_MULTIPATH is not set
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_DEBUG=y
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
-# CONFIG_DM_RAID is not set
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
@@ -976,558 +345,143 @@ CONFIG_DM_MULTIPATH_QL=m
 CONFIG_DM_MULTIPATH_ST=m
 CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
-# CONFIG_DM_FLAKEY is not set
-# CONFIG_TARGET_CORE is not set
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_FIREWIRE is not set
-# CONFIG_FIREWIRE_NOSY is not set
-# CONFIG_I2O is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
 CONFIG_NETDEVICES=y
-CONFIG_IFB=m
-CONFIG_DUMMY=m
 CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
-# CONFIG_EQUALIZER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETPOLL_TRAP=y
 CONFIG_TUN=y
 CONFIG_VETH=m
-# CONFIG_ARCNET is not set
-# CONFIG_MII is not set
-CONFIG_PHYLIB=y
-
-#
-# MII PHY device drivers
-#
-# CONFIG_MARVELL_PHY is not set
-# CONFIG_DAVICOM_PHY is not set
-# CONFIG_QSEMI_PHY is not set
-# CONFIG_LXT_PHY is not set
-# CONFIG_CICADA_PHY is not set
-# CONFIG_VITESSE_PHY is not set
-# CONFIG_SMSC_PHY is not set
-# CONFIG_BROADCOM_PHY is not set
-# CONFIG_BCM63XX_PHY is not set
-# CONFIG_ICPLUS_PHY is not set
-# CONFIG_REALTEK_PHY is not set
-# CONFIG_NATIONAL_PHY is not set
-# CONFIG_STE10XP is not set
-# CONFIG_LSI_ET1011C_PHY is not set
-# CONFIG_MICREL_PHY is not set
-# CONFIG_FIXED_PHY is not set
-# CONFIG_MDIO_BITBANG is not set
-# CONFIG_NET_ETHERNET is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-CONFIG_E1000E=m
-# CONFIG_IP1000 is not set
-# CONFIG_IGB is not set
-# CONFIG_IGBVF is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-# CONFIG_CNIC is not set
-# CONFIG_QLA3XXX is not set
-# CONFIG_ATL1 is not set
-# CONFIG_ATL1E is not set
-# CONFIG_ATL1C is not set
-# CONFIG_JME is not set
-# CONFIG_STMMAC_ETH is not set
-# CONFIG_PCH_GBE is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_TR is not set
-# CONFIG_WLAN is not set
-
-#
-# Enable WiMAX (Networking options) to see the WiMAX drivers
-#
-# CONFIG_WAN is not set
-
-#
-# CAIF transport drivers
-#
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
 # CONFIG_TILE_NET is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_VMXNET3 is not set
-# CONFIG_ISDN is not set
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-# CONFIG_INPUT_POLLDEV is not set
-# CONFIG_INPUT_SPARSEKMAP is not set
-
-#
-# Userland interfaces
-#
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_WLAN is not set
 # CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TABLET is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
 # CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
 # CONFIG_VT is not set
-CONFIG_UNIX98_PTYS=y
-# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-# CONFIG_NOZOMI is not set
-# CONFIG_N_GSM is not set
-CONFIG_DEVKMEM=y
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_SERIAL_MFD_HSU is not set
-# CONFIG_SERIAL_JSM is not set
-# CONFIG_SERIAL_TIMBERDALE is not set
-# CONFIG_SERIAL_ALTERA_JTAGUART is not set
-# CONFIG_SERIAL_ALTERA_UART is not set
-# CONFIG_SERIAL_PCH_UART is not set
-# CONFIG_TTY_PRINTK is not set
-CONFIG_HVC_DRIVER=y
-# CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_TIMERIOMEM=m
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# PCMCIA character devices
-#
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_TCG_TPM is not set
-CONFIG_DEVPORT=y
-# CONFIG_RAMOOPS is not set
 CONFIG_I2C=y
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
 CONFIG_I2C_CHARDEV=y
-# CONFIG_I2C_MUX is not set
-CONFIG_I2C_HELPER_AUTO=y
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_ISCH is not set
-# CONFIG_I2C_PIIX4 is not set
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-# CONFIG_I2C_INTEL_MID is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PCA_PLATFORM is not set
-# CONFIG_I2C_PXA_PCI is not set
-# CONFIG_I2C_SIMTEC is not set
-# CONFIG_I2C_XILINX is not set
-# CONFIG_I2C_EG20T is not set
-
-#
-# External I2C/SMBus adapter drivers
-#
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_TAOS_EVM is not set
-
-#
-# Other I2C/SMBus bus drivers
-#
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_SPI is not set
-
-#
-# PPS support
-#
-# CONFIG_PPS is not set
-
-#
-# PPS generators support
-#
-# CONFIG_W1 is not set
-# CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
-# CONFIG_THERMAL is not set
-# CONFIG_WATCHDOG is not set
-CONFIG_SSB_POSSIBLE=y
-
-#
-# Sonics Silicon Backplane
-#
-# CONFIG_SSB is not set
-CONFIG_MFD_SUPPORT=y
-# CONFIG_MFD_CORE is not set
-# CONFIG_MFD_88PM860X is not set
-# CONFIG_MFD_SM501 is not set
-# CONFIG_HTC_PASIC3 is not set
-# CONFIG_TPS6105X is not set
-# CONFIG_TPS6507X is not set
-# CONFIG_TWL4030_CORE is not set
-# CONFIG_MFD_STMPE is not set
-# CONFIG_MFD_TC3589X is not set
-# CONFIG_MFD_TMIO is not set
-# CONFIG_PMIC_DA903X is not set
-# CONFIG_PMIC_ADP5520 is not set
-# CONFIG_MFD_MAX8925 is not set
-# CONFIG_MFD_MAX8997 is not set
-# CONFIG_MFD_MAX8998 is not set
-# CONFIG_MFD_WM8400 is not set
-# CONFIG_MFD_WM831X_I2C is not set
-# CONFIG_MFD_WM8350_I2C is not set
-# CONFIG_MFD_WM8994 is not set
-# CONFIG_MFD_PCF50633 is not set
-# CONFIG_ABX500_CORE is not set
-# CONFIG_LPC_SCH is not set
-# CONFIG_MFD_RDC321X is not set
-# CONFIG_MFD_JANZ_CMODIO is not set
-# CONFIG_MFD_VX855 is not set
-# CONFIG_MFD_WL1273_CORE is not set
-# CONFIG_REGULATOR is not set
-# CONFIG_MEDIA_SUPPORT is not set
-
-#
-# Graphics support
-#
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
 # CONFIG_VGA_ARB is not set
-# CONFIG_DRM is not set
-# CONFIG_STUB_POULSBO is not set
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Display device support
-#
-# CONFIG_DISPLAY_SUPPORT is not set
-# CONFIG_SOUND is not set
 # CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_UWB is not set
-# CONFIG_MMC is not set
-# CONFIG_MEMSTICK is not set
-# CONFIG_NEW_LEDS is not set
-# CONFIG_NFC_DEVICES is not set
-# CONFIG_ACCESSIBILITY is not set
-# CONFIG_INFINIBAND is not set
-# CONFIG_EDAC is not set
-CONFIG_RTC_LIB=y
+CONFIG_USB=y
+# CONFIG_USB_DEVICE_CLASS is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_LIBUSUAL=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
 CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# I2C RTC drivers
-#
-# CONFIG_RTC_DRV_DS1307 is not set
-# CONFIG_RTC_DRV_DS1374 is not set
-# CONFIG_RTC_DRV_DS1672 is not set
-# CONFIG_RTC_DRV_DS3232 is not set
-# CONFIG_RTC_DRV_MAX6900 is not set
-# CONFIG_RTC_DRV_RS5C372 is not set
-# CONFIG_RTC_DRV_ISL1208 is not set
-# CONFIG_RTC_DRV_ISL12022 is not set
-# CONFIG_RTC_DRV_X1205 is not set
-# CONFIG_RTC_DRV_PCF8563 is not set
-# CONFIG_RTC_DRV_PCF8583 is not set
-# CONFIG_RTC_DRV_M41T80 is not set
-# CONFIG_RTC_DRV_BQ32K is not set
-# CONFIG_RTC_DRV_S35390A is not set
-# CONFIG_RTC_DRV_FM3130 is not set
-# CONFIG_RTC_DRV_RX8581 is not set
-# CONFIG_RTC_DRV_RX8025 is not set
-
-#
-# SPI RTC drivers
-#
-
-#
-# Platform RTC drivers
-#
-# CONFIG_RTC_DRV_DS1286 is not set
-# CONFIG_RTC_DRV_DS1511 is not set
-# CONFIG_RTC_DRV_DS1553 is not set
-# CONFIG_RTC_DRV_DS1742 is not set
-# CONFIG_RTC_DRV_STK17TA8 is not set
-# CONFIG_RTC_DRV_M48T86 is not set
-# CONFIG_RTC_DRV_M48T35 is not set
-# CONFIG_RTC_DRV_M48T59 is not set
-# CONFIG_RTC_DRV_MSM6242 is not set
-# CONFIG_RTC_DRV_BQ4802 is not set
-# CONFIG_RTC_DRV_RP5C01 is not set
-# CONFIG_RTC_DRV_V3020 is not set
-
-#
-# on-CPU RTC drivers
-#
 CONFIG_RTC_DRV_TILE=y
-# CONFIG_DMADEVICES is not set
-# CONFIG_AUXDISPLAY is not set
-# CONFIG_UIO is not set
-# CONFIG_STAGING is not set
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
-CONFIG_EXT3_DEFAULTS_TO_ORDERED=y
-CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
 CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_XATTR=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_FS_XIP=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_JBD2=y
-CONFIG_JBD2_DEBUG=y
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_XFS_FS=m
+CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
-# CONFIG_XFS_RT is not set
-# CONFIG_XFS_DEBUG is not set
 CONFIG_GFS2_FS=m
 CONFIG_GFS2_FS_LOCKING_DLM=y
-# CONFIG_OCFS2_FS is not set
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
-# CONFIG_NILFS2_FS is not set
-CONFIG_FS_POSIX_ACL=y
-CONFIG_EXPORTFS=y
-CONFIG_FILE_LOCKING=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_FANOTIFY is not set
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-# CONFIG_QUOTA_DEBUG is not set
-CONFIG_QUOTA_TREE=y
-# CONFIG_QFMT_V1 is not set
 CONFIG_QFMT_V2=y
-CONFIG_QUOTACTL=y
-# CONFIG_AUTOFS4_FS is not set
+CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
-CONFIG_GENERIC_ACL=y
-
-#
-# Caches
-#
 CONFIG_FSCACHE=m
 CONFIG_FSCACHE_STATS=y
-# CONFIG_FSCACHE_HISTOGRAM is not set
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
 CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_CONFIGFS_FS=m
-CONFIG_MISC_FILESYSTEMS=y
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
 CONFIG_ECRYPT_FS=m
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_LOGFS is not set
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_XATTR is not set
-# CONFIG_SQUASHFS_LZO is not set
-# CONFIG_SQUASHFS_XZ is not set
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_OMFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_PSTORE is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
-CONFIG_PNFS_FILE_LAYOUT=m
 CONFIG_NFS_FSCACHE=y
-# CONFIG_NFS_USE_LEGACY_DNS is not set
-CONFIG_NFS_USE_KERNEL_DNS=y
-# CONFIG_NFS_USE_NEW_IDMAPPER is not set
 CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_CEPH_FS is not set
 CONFIG_CIFS=m
 CONFIG_CIFS_STATS=y
-# CONFIG_CIFS_STATS2 is not set
 CONFIG_CIFS_WEAK_PW_HASH=y
 CONFIG_CIFS_UPCALL=y
 CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
-# CONFIG_CIFS_DEBUG2 is not set
 CONFIG_CIFS_DFS_UPCALL=y
 CONFIG_CIFS_FSCACHE=y
-# CONFIG_CIFS_ACL is not set
-CONFIG_CIFS_EXPERIMENTAL=y
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_MAC_PARTITION=y
-CONFIG_MSDOS_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-# CONFIG_LDM_PARTITION is not set
-CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-CONFIG_NLS=y
 CONFIG_NLS_DEFAULT="utf8"
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=m
@@ -1567,185 +521,47 @@ CONFIG_NLS_ISO8859_15=m
 CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
 CONFIG_DLM_DEBUG=y
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_ENABLE_MUST_CHECK=y
-CONFIG_FRAME_WARN=2048
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_STRIP_ASM_SYMS=y
-# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_CHECK=y
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_SHIRQ=y
 CONFIG_LOCKUP_DETECTOR=y
-# CONFIG_HARDLOCKUP_DETECTOR is not set
-# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-CONFIG_SCHED_DEBUG=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_SPARSE_RCU_POINTER is not set
-# CONFIG_LOCK_STAT is not set
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-CONFIG_STACKTRACE=y
-# CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_DEBUG_VM=y
-# CONFIG_DEBUG_WRITECOUNT is not set
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DEBUG_LIST=y
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
 CONFIG_DEBUG_CREDENTIALS=y
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-# CONFIG_LKDTM is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_SCHED_TRACER is not set
-# CONFIG_ENABLE_DEFAULT_TRACERS is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-# CONFIG_PROFILE_ALL_BRANCHES is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_BUILD_DOCSRC is not set
 CONFIG_DYNAMIC_DEBUG=y
-# CONFIG_ATOMIC64_SELFTEST is not set
 CONFIG_ASYNC_RAID6_TEST=m
-# CONFIG_SAMPLES is not set
-# CONFIG_TEST_KSTRTOX is not set
-CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_DEBUG_EXTRA_FLAGS=""
-
-#
-# Security options
-#
-CONFIG_KEYS=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_NETWORK_XFRM=y
-# CONFIG_SECURITY_PATH is not set
-CONFIG_LSM_MMAP_MIN_ADDR=65536
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
 CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
-# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
-# CONFIG_SECURITY_SMACK is not set
-# CONFIG_SECURITY_TOMOYO is not set
-# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_IMA is not set
-CONFIG_DEFAULT_SECURITY_SELINUX=y
-# CONFIG_DEFAULT_SECURITY_DAC is not set
-CONFIG_DEFAULT_SECURITY="selinux"
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=m
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_BLKCIPHER=m
-CONFIG_CRYPTO_BLKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=m
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_PCOMP=m
-CONFIG_CRYPTO_PCOMP2=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
-CONFIG_CRYPTO_GF128MUL=m
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_WORKQUEUE=y
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
 CONFIG_CRYPTO_TEST=m
-
-#
-# Authenticated Encryption with Associated Data
-#
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_SEQIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CTR=m
 CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
-
-#
-# Hash modes
-#
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
 CONFIG_CRYPTO_CRC32C=y
-CONFIG_CRYPTO_GHASH=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
@@ -1756,76 +572,16 @@ CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
-# CONFIG_CRYPTO_SALSA20 is not set
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-# CONFIG_CRYPTO_USER_API_HASH is not set
-# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_HIFN_795X=m
-CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
-# CONFIG_BINARY_PRINTF is not set
-
-#
-# Library routines
-#
-CONFIG_RAID6_PQ=m
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_FIND_LAST_BIT=y
-# CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=y
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
-CONFIG_LIBCRC32C=m
-CONFIG_AUDIT_GENERIC=y
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_LZO_COMPRESS=m
-CONFIG_LZO_DECOMPRESS=m
-# CONFIG_XZ_DEC is not set
-# CONFIG_XZ_DEC_BCJ is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-CONFIG_HAS_DMA=y
-CONFIG_CPU_RMAP=y
-CONFIG_NLATTR=y
-# CONFIG_AVERAGE is not set
-# CONFIG_VIRTUALIZATION is not set
index 6f05f96..2b1fd31 100644 (file)
-#
-# Automatically generated make config: don't edit
-# Linux/tile 2.6.39-rc5 Kernel Configuration
-# Tue May  3 09:15:02 2011
-#
-CONFIG_TILE=y
-CONFIG_MMU=y
-CONFIG_GENERIC_CSUM=y
-CONFIG_SEMAPHORE_SLEEPERS=y
-CONFIG_HAVE_ARCH_ALLOC_REMAP=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_SYS_SUPPORTS_HUGETLBFS=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-CONFIG_DEFAULT_MIGRATION_COST=10000000
-CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
-CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_STRICT_DEVMEM=y
-CONFIG_SMP=y
-# CONFIG_DEBUG_COPY_FROM_USER is not set
-CONFIG_HVC_TILE=y
-# CONFIG_TILEGX is not set
-CONFIG_ARCH_DEFCONFIG="arch/tile/configs/tile_defconfig"
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-CONFIG_CONSTRUCTORS=y
-
-#
-# General setup
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_CROSS_COMPILE=""
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-# CONFIG_SWAP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
 CONFIG_FHANDLE=y
-# CONFIG_TASKSTATS is not set
-# CONFIG_AUDIT is not set
-CONFIG_HAVE_GENERIC_HARDIRQS=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_IRQ_SHOW=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-# CONFIG_PREEMPT_RCU is not set
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_FANOUT=32
-# CONFIG_RCU_FANOUT_EXACT is not set
-# CONFIG_RCU_FAST_NO_HZ is not set
-# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_LOG_BUF_SHIFT=17
-# CONFIG_CGROUPS is not set
-# CONFIG_NAMESPACES is not set
-# CONFIG_SCHED_AUTOGROUP is not set
-# CONFIG_SYSFS_DEPRECATED is not set
-# CONFIG_RELAY is not set
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_LOG_BUF_SHIFT=19
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_MEM_RES_CTLR=y
+CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
+CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="usr/contents.txt"
-CONFIG_INITRAMFS_ROOT_UID=0
-CONFIG_INITRAMFS_ROOT_GID=0
-CONFIG_RD_GZIP=y
-# CONFIG_RD_BZIP2 is not set
-# CONFIG_RD_LZMA is not set
-# CONFIG_RD_XZ is not set
-# CONFIG_RD_LZO is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
-# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-CONFIG_ANON_INODES=y
-CONFIG_EXPERT=y
 CONFIG_SYSCTL_SYSCALL=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
 CONFIG_EMBEDDED=y
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_PCI_QUIRKS=y
-CONFIG_SLUB_DEBUG=y
 # CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
 CONFIG_PROFILING=y
-CONFIG_USE_GENERIC_SMP_HELPERS=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
-CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
-# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_STOP_MACHINE=y
-CONFIG_BLOCK=y
-CONFIG_LBDAF=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_BLK_DEV_INTEGRITY is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_DEFAULT_NOOP=y
-CONFIG_DEFAULT_IOSCHED="noop"
-# CONFIG_INLINE_SPIN_TRYLOCK is not set
-# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK is not set
-# CONFIG_INLINE_SPIN_LOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
-CONFIG_INLINE_SPIN_UNLOCK=y
-# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
-# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_READ_TRYLOCK is not set
-# CONFIG_INLINE_READ_LOCK is not set
-# CONFIG_INLINE_READ_LOCK_BH is not set
-# CONFIG_INLINE_READ_LOCK_IRQ is not set
-# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
-CONFIG_INLINE_READ_UNLOCK=y
-# CONFIG_INLINE_READ_UNLOCK_BH is not set
-CONFIG_INLINE_READ_UNLOCK_IRQ=y
-# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_WRITE_TRYLOCK is not set
-# CONFIG_INLINE_WRITE_LOCK is not set
-# CONFIG_INLINE_WRITE_LOCK_BH is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
-CONFIG_INLINE_WRITE_UNLOCK=y
-# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
-# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
-CONFIG_MUTEX_SPIN_ON_OWNER=y
-
-#
-# Tilera-specific configuration
-#
-CONFIG_NR_CPUS=64
-CONFIG_TICK_ONESHOT=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_CFQ_GROUP_IOSCHED=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
-CONFIG_SCHED_HRTICK=y
-# CONFIG_KEXEC is not set
-CONFIG_HIGHMEM=y
-CONFIG_NUMA=y
-CONFIG_NODES_SHIFT=2
-# CONFIG_VMSPLIT_3_75G is not set
-# CONFIG_VMSPLIT_3_5G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_2_75G is not set
-# CONFIG_VMSPLIT_2_5G is not set
-# CONFIG_VMSPLIT_2_25G is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
-CONFIG_PAGE_OFFSET=0xC0000000
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_DISCONTIGMEM_MANUAL=y
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_COMPACTION is not set
-CONFIG_MIGRATION=y
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_ZONE_DMA_FLAG=0
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-# CONFIG_KSM is not set
-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_VMALLOC_RESERVE=0x1000000
-CONFIG_HARDWALL=y
-CONFIG_KERNEL_PL=1
-
-#
-# Bus options
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_NO_IOMEM is not set
-# CONFIG_NO_IOPORT is not set
-# CONFIG_ARCH_SUPPORTS_MSI is not set
-# CONFIG_PCI_DEBUG is not set
-# CONFIG_PCI_STUB is not set
-# CONFIG_PCI_IOV is not set
-# CONFIG_HOTPLUG_PCI is not set
-
-#
-# Executable file formats
-#
-CONFIG_KCORE_ELF=y
-CONFIG_BINFMT_ELF=y
+CONFIG_PCI_DEBUG=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-# CONFIG_HAVE_AOUT is not set
-# CONFIG_BINFMT_MISC is not set
+CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
-
-#
-# Networking options
-#
 CONFIG_PACKET=y
 CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-# CONFIG_XFRM_MIGRATE is not set
-# CONFIG_XFRM_STATISTICS is not set
-# CONFIG_NET_KEY is not set
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE_DEMUX is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-CONFIG_INET_TUNNEL=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_BEET=y
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_IPV6_ROUTER_PREF is not set
-# CONFIG_IPV6_OPTIMISTIC_DAD is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_IPV6_MIP6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-CONFIG_INET6_XFRM_MODE_TRANSPORT=y
-CONFIG_INET6_XFRM_MODE_TUNNEL=y
-CONFIG_INET6_XFRM_MODE_BEET=y
-# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
-CONFIG_IPV6_SIT=y
-# CONFIG_IPV6_SIT_6RD is not set
-CONFIG_IPV6_NDISC_NODETYPE=y
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_IPV6_MULTIPLE_TABLES is not set
-# CONFIG_IPV6_MROUTE is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_IP_DCCP is not set
-# CONFIG_IP_SCTP is not set
-# CONFIG_RDS is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_L2TP is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_NET_DSA is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_PHONET is not set
-# CONFIG_IEEE802154 is not set
-# CONFIG_NET_SCHED is not set
-# CONFIG_DCB is not set
-# CONFIG_BATMAN_ADV is not set
-CONFIG_RPS=y
-CONFIG_RFS_ACCEL=y
-CONFIG_XPS=y
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_CAN is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_BRIDGE=m
+CONFIG_NET_DSA=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_PHONET=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS_IND=y
+CONFIG_DCB=y
 # CONFIG_WIRELESS is not set
-# CONFIG_WIMAX is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-# CONFIG_CAIF is not set
-# CONFIG_CEPH_LIB is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_DEVTMPFS is not set
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=y
-CONFIG_FIRMWARE_IN_KERNEL=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_SYS_HYPERVISOR is not set
-# CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
-# CONFIG_PARPORT is not set
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-
-#
-# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
-#
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_RAM is not set
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-# CONFIG_BLK_DEV_RBD is not set
-# CONFIG_SENSORS_LIS3LV02D is not set
-CONFIG_MISC_DEVICES=y
-# CONFIG_PHANTOM is not set
-# CONFIG_SGI_IOC4 is not set
-# CONFIG_TIFM_CORE is not set
-# CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_HP_ILO is not set
-# CONFIG_PCH_PHUB is not set
-# CONFIG_C2PORT is not set
-
-#
-# EEPROM support
-#
-# CONFIG_EEPROM_93CX6 is not set
-# CONFIG_CB710_CORE is not set
-
-#
-# Texas Instruments shared transport line discipline
-#
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-# CONFIG_RAID_ATTRS is not set
-CONFIG_SCSI=y
-CONFIG_SCSI_DMA=y
-# CONFIG_SCSI_TGT is not set
-# CONFIG_SCSI_NETLINK is not set
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_ATA_OVER_ETH=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI_TGT=m
 CONFIG_BLK_DEV_SD=y
-# CONFIG_CHR_DEV_ST is not set
-# CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
-# CONFIG_CHR_DEV_SCH is not set
-# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
-# CONFIG_SCSI_SCAN_ASYNC is not set
-CONFIG_SCSI_WAIT_SCAN=m
-
-#
-# SCSI Transports
-#
-# CONFIG_SCSI_SPI_ATTRS is not set
-# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-# CONFIG_SCSI_SAS_LIBSAS is not set
-# CONFIG_SCSI_SRP_ATTRS is not set
-CONFIG_SCSI_LOWLEVEL=y
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_ISCSI_BOOT_SYSFS is not set
-# CONFIG_SCSI_CXGB3_ISCSI is not set
-# CONFIG_SCSI_CXGB4_ISCSI is not set
-# CONFIG_SCSI_BNX2_ISCSI is not set
-# CONFIG_SCSI_BNX2X_FCOE is not set
-# CONFIG_BE2ISCSI is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_HPSA is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_3W_SAS is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_SCSI_AIC94XX is not set
-# CONFIG_SCSI_MVSAS is not set
-# CONFIG_SCSI_DPT_I2O is not set
-# CONFIG_SCSI_ADVANSYS is not set
-# CONFIG_SCSI_ARCMSR is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_MPT2SAS is not set
-# CONFIG_SCSI_HPTIOP is not set
-# CONFIG_LIBFC is not set
-# CONFIG_LIBFCOE is not set
-# CONFIG_FCOE is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_STEX is not set
-# CONFIG_SCSI_SYM53C8XX_2 is not set
-# CONFIG_SCSI_QLOGIC_1280 is not set
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_QLA_ISCSI is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_NSP32 is not set
-# CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_PMCRAID is not set
-# CONFIG_SCSI_PM8001 is not set
-# CONFIG_SCSI_SRP is not set
-# CONFIG_SCSI_BFA_FC is not set
-# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
-# CONFIG_SCSI_DH is not set
-# CONFIG_SCSI_OSD_INITIATOR is not set
-# CONFIG_ATA is not set
-# CONFIG_MD is not set
-# CONFIG_TARGET_CORE is not set
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_FIREWIRE is not set
-# CONFIG_FIREWIRE_NOSY is not set
-# CONFIG_I2O is not set
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MULTICORE_RAID456=y
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
 CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_MACVLAN is not set
-# CONFIG_EQUALIZER is not set
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETPOLL_TRAP=y
 CONFIG_TUN=y
-# CONFIG_VETH is not set
-# CONFIG_ARCNET is not set
-# CONFIG_MII is not set
-# CONFIG_PHYLIB is not set
-# CONFIG_NET_ETHERNET is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_E1000E is not set
-# CONFIG_IP1000 is not set
-# CONFIG_IGB is not set
-# CONFIG_IGBVF is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-# CONFIG_CNIC is not set
-# CONFIG_QLA3XXX is not set
-# CONFIG_ATL1 is not set
-# CONFIG_ATL1E is not set
-# CONFIG_ATL1C is not set
-# CONFIG_JME is not set
-# CONFIG_STMMAC_ETH is not set
-# CONFIG_PCH_GBE is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_TR is not set
+CONFIG_VETH=m
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_WLAN is not set
-
-#
-# Enable WiMAX (Networking options) to see the WiMAX drivers
-#
-# CONFIG_WAN is not set
-
-#
-# CAIF transport drivers
-#
-CONFIG_TILE_NET=y
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_VMXNET3 is not set
-# CONFIG_ISDN is not set
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-# CONFIG_INPUT_POLLDEV is not set
-# CONFIG_INPUT_SPARSEKMAP is not set
-
-#
-# Userland interfaces
-#
 # CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TABLET is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
 # CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
 # CONFIG_VT is not set
-CONFIG_UNIX98_PTYS=y
-# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-# CONFIG_NOZOMI is not set
-# CONFIG_N_GSM is not set
-CONFIG_DEVKMEM=y
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_SERIAL_MFD_HSU is not set
-# CONFIG_SERIAL_JSM is not set
-# CONFIG_SERIAL_TIMBERDALE is not set
-# CONFIG_SERIAL_ALTERA_JTAGUART is not set
-# CONFIG_SERIAL_ALTERA_UART is not set
-# CONFIG_SERIAL_PCH_UART is not set
-# CONFIG_TTY_PRINTK is not set
-CONFIG_HVC_DRIVER=y
-# CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# PCMCIA character devices
-#
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_TCG_TPM is not set
-CONFIG_DEVPORT=y
-# CONFIG_RAMOOPS is not set
-# CONFIG_I2C is not set
-# CONFIG_SPI is not set
-
-#
-# PPS support
-#
-# CONFIG_PPS is not set
-
-#
-# PPS generators support
-#
-# CONFIG_W1 is not set
-# CONFIG_POWER_SUPPLY is not set
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-# CONFIG_SENSORS_I5K_AMB is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_F71882FG is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_PC87427 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_SCH5627 is not set
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT1211 is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_THERMAL is not set
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_NOWAYOUT=y
-
-#
-# Watchdog Device Drivers
-#
-# CONFIG_SOFT_WATCHDOG is not set
-# CONFIG_ALIM7101_WDT is not set
-
-#
-# PCI-based Watchdog Cards
-#
-# CONFIG_PCIPCWATCHDOG is not set
-# CONFIG_WDTPCI is not set
-CONFIG_SSB_POSSIBLE=y
-
-#
-# Sonics Silicon Backplane
-#
-# CONFIG_SSB is not set
-CONFIG_MFD_SUPPORT=y
-# CONFIG_MFD_CORE is not set
-# CONFIG_MFD_SM501 is not set
-# CONFIG_HTC_PASIC3 is not set
-# CONFIG_MFD_TMIO is not set
-# CONFIG_ABX500_CORE is not set
-# CONFIG_LPC_SCH is not set
-# CONFIG_MFD_RDC321X is not set
-# CONFIG_MFD_JANZ_CMODIO is not set
-# CONFIG_MFD_VX855 is not set
-# CONFIG_REGULATOR is not set
-# CONFIG_MEDIA_SUPPORT is not set
-
-#
-# Graphics support
-#
-CONFIG_VGA_ARB=y
-CONFIG_VGA_ARB_MAX_GPUS=16
-# CONFIG_DRM is not set
-# CONFIG_STUB_POULSBO is not set
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Display device support
-#
-# CONFIG_DISPLAY_SUPPORT is not set
-# CONFIG_SOUND is not set
+# CONFIG_VGA_ARB is not set
 # CONFIG_HID_SUPPORT is not set
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB_ARCH_HAS_EHCI=y
-# CONFIG_USB is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-
-#
-# Enable Host or Gadget support to see Inventra options
-#
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# OTG and related infrastructure
-#
-# CONFIG_UWB is not set
-# CONFIG_MMC is not set
-# CONFIG_MEMSTICK is not set
-# CONFIG_NEW_LEDS is not set
-# CONFIG_NFC_DEVICES is not set
-# CONFIG_ACCESSIBILITY is not set
-# CONFIG_INFINIBAND is not set
+# CONFIG_USB_SUPPORT is not set
 CONFIG_EDAC=y
-
-#
-# Reporting subsystems
-#
-# CONFIG_EDAC_DEBUG is not set
 CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_TILE=y
-CONFIG_RTC_LIB=y
 CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
-# CONFIG_RTC_DEBUG is not set
-
-#
-# RTC interfaces
-#
-# CONFIG_RTC_INTF_SYSFS is not set
-# CONFIG_RTC_INTF_PROC is not set
-CONFIG_RTC_INTF_DEV=y
-# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
-# CONFIG_RTC_DRV_TEST is not set
-
-#
-# SPI RTC drivers
-#
-
-#
-# Platform RTC drivers
-#
-# CONFIG_RTC_DRV_DS1286 is not set
-# CONFIG_RTC_DRV_DS1511 is not set
-# CONFIG_RTC_DRV_DS1553 is not set
-# CONFIG_RTC_DRV_DS1742 is not set
-# CONFIG_RTC_DRV_STK17TA8 is not set
-# CONFIG_RTC_DRV_M48T86 is not set
-# CONFIG_RTC_DRV_M48T35 is not set
-# CONFIG_RTC_DRV_M48T59 is not set
-# CONFIG_RTC_DRV_MSM6242 is not set
-# CONFIG_RTC_DRV_BQ4802 is not set
-# CONFIG_RTC_DRV_RP5C01 is not set
-# CONFIG_RTC_DRV_V3020 is not set
-
-#
-# on-CPU RTC drivers
-#
 CONFIG_RTC_DRV_TILE=y
-# CONFIG_DMADEVICES is not set
-# CONFIG_AUXDISPLAY is not set
-# CONFIG_UIO is not set
-# CONFIG_STAGING is not set
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-# CONFIG_EXT4_FS is not set
-CONFIG_JBD=y
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_BTRFS_FS is not set
-# CONFIG_NILFS2_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-CONFIG_EXPORTFS=y
-CONFIG_FILE_LOCKING=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_FANOTIFY is not set
-# CONFIG_QUOTA is not set
-# CONFIG_QUOTACTL is not set
-# CONFIG_AUTOFS4_FS is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
-# CONFIG_CUSE is not set
-
-#
-# Caches
-#
-# CONFIG_FSCACHE is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-CONFIG_MSDOS_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-# CONFIG_PROC_KCORE is not set
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_SYSFS=y
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
+CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-# CONFIG_CONFIGFS_FS is not set
-CONFIG_MISC_FILESYSTEMS=y
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_LOGFS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_SQUASHFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_OMFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_PSTORE is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-# CONFIG_NFS_V4 is not set
-# CONFIG_NFSD is not set
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_CEPH_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_FSCACHE=y
 CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_DEFAULT="utf8"
 CONFIG_NLS_CODEPAGE_437=y
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4
-CONFIG_ENABLE_WARN_DEPRECATED=y
-CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM_DEBUG=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
 CONFIG_FRAME_WARN=2048
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_STRIP_ASM_SYMS is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_HEADERS_CHECK is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
-# CONFIG_LOCKUP_DETECTOR is not set
-# CONFIG_HARDLOCKUP_DETECTOR is not set
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
-CONFIG_SCHED_DEBUG=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
-# CONFIG_DEBUG_KMEMLEAK is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_SPARSE_RCU_POINTER is not set
-# CONFIG_LOCK_STAT is not set
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-CONFIG_STACKTRACE=y
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
+CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_DEBUG_VM=y
-# CONFIG_DEBUG_WRITECOUNT is not set
-# CONFIG_DEBUG_MEMORY_INIT is not set
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_DEBUG_CREDENTIALS is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_TRACING_SUPPORT=y
-CONFIG_FTRACE=y
-# CONFIG_IRQSOFF_TRACER is not set
-# CONFIG_SCHED_TRACER is not set
-# CONFIG_ENABLE_DEFAULT_TRACERS is not set
-CONFIG_BRANCH_PROFILE_NONE=y
-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
-# CONFIG_PROFILE_ALL_BRANCHES is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_SAMPLES is not set
-# CONFIG_TEST_KSTRTOX is not set
-CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_DEBUG_EXTRA_FLAGS="-femit-struct-debug-baseonly"
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITYFS is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_DEFAULT_SECURITY=""
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-# CONFIG_CRYPTO_FIPS is not set
-CONFIG_CRYPTO_ALGAPI=m
-CONFIG_CRYPTO_ALGAPI2=m
-CONFIG_CRYPTO_RNG=m
-CONFIG_CRYPTO_RNG2=m
-# CONFIG_CRYPTO_MANAGER is not set
-# CONFIG_CRYPTO_MANAGER2 is not set
-# CONFIG_CRYPTO_GF128MUL is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_PCRYPT is not set
-# CONFIG_CRYPTO_CRYPTD is not set
-# CONFIG_CRYPTO_AUTHENC is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Authenticated Encryption with Associated Data
-#
-# CONFIG_CRYPTO_CCM is not set
-# CONFIG_CRYPTO_GCM is not set
-# CONFIG_CRYPTO_SEQIV is not set
-
-#
-# Block modes
-#
-# CONFIG_CRYPTO_CBC is not set
-# CONFIG_CRYPTO_CTR is not set
-# CONFIG_CRYPTO_CTS is not set
-# CONFIG_CRYPTO_ECB is not set
-# CONFIG_CRYPTO_LRW is not set
-# CONFIG_CRYPTO_PCBC is not set
-# CONFIG_CRYPTO_XTS is not set
-
-#
-# Hash modes
-#
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_VMAC is not set
-
-#
-# Digest
-#
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_GHASH is not set
-# CONFIG_CRYPTO_MD4 is not set
-# CONFIG_CRYPTO_MD5 is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_RMD128 is not set
-# CONFIG_CRYPTO_RMD160 is not set
-# CONFIG_CRYPTO_RMD256 is not set
-# CONFIG_CRYPTO_RMD320 is not set
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_WP512 is not set
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=m
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_CAMELLIA is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_DES is not set
-# CONFIG_CRYPTO_FCRYPT is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_SALSA20 is not set
-# CONFIG_CRYPTO_SEED is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-
-#
-# Compression
-#
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_ZLIB is not set
-# CONFIG_CRYPTO_LZO is not set
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-# CONFIG_CRYPTO_USER_API_HASH is not set
-# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
-CONFIG_CRYPTO_HW=y
-# CONFIG_CRYPTO_DEV_HIFN_795X is not set
-# CONFIG_BINARY_PRINTF is not set
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_FIND_LAST_BIT=y
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_T10DIF is not set
-# CONFIG_CRC_ITU_T is not set
-CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_ZLIB_INFLATE=y
-# CONFIG_XZ_DEC is not set
-# CONFIG_XZ_DEC_BCJ is not set
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-CONFIG_HAS_DMA=y
-CONFIG_CPU_RMAP=y
-CONFIG_NLATTR=y
-# CONFIG_AVERAGE is not set
-CONFIG_HAVE_KVM=y
-# CONFIG_VIRTUALIZATION is not set
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32C=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC7=m
index a7869ad..77763cc 100644 (file)
@@ -303,10 +303,7 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs)
                goto badframe;
 
        sigdelsetmask(&set, ~_BLOCKABLE);
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       set_current_blocked(&set);
 
        if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
index 4c1ac6e..6ae495e 100644 (file)
@@ -108,9 +108,7 @@ void cpu_idle(void)
                }
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index bedaf4e..f79d4b8 100644 (file)
@@ -97,10 +97,7 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
                goto badframe;
 
        sigdelsetmask(&set, ~_BLOCKABLE);
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       set_current_blocked(&set);
 
        if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
@@ -286,13 +283,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
                 * the work_pending path in the return-to-user code, and
                 * either way we can re-enable interrupts unconditionally.
                 */
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,
-                         &current->blocked, &ka->sa.sa_mask);
-               if (!(ka->sa.sa_flags & SA_NODEFER))
-                       sigaddset(&current->blocked, sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
+               block_sigmask(ka, sig);
        }
 
        return ret;
index f862b00..71ae728 100644 (file)
@@ -163,7 +163,7 @@ static int __init create_sysfs_entries(void)
 
 #define create_hv_attr(name)                                           \
        if (!err)                                                       \
-               err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name);
+               err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name.attr);
        create_hv_attr(type);
        create_hv_attr(version);
        create_hv_attr(config_version);
index cb0999f..b16ac49 100644 (file)
@@ -144,7 +144,7 @@ void arch_read_unlock(arch_rwlock_t *rwlock)
        for (;;) {
                __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
                val = __insn_tns((int *)&rwlock->lock);
-               if (likely(val & 1) == 0) {
+               if (likely((val & 1) == 0)) {
                        rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
                        __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
                        break;
index 6339aa4..d523a6c 100644 (file)
@@ -82,6 +82,7 @@ config X86
        select CLKEVT_I8253
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select GENERIC_IOMAP
+       select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
 
 config INSTRUCTION_DECODER
        def_bool (KPROBES || PERF_EVENTS)
index fd84387..39e4909 100644 (file)
@@ -315,6 +315,13 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        current->mm->free_area_cache = TASK_UNMAPPED_BASE;
        current->mm->cached_hole_size = 0;
 
+       retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
+       if (retval < 0) {
+               /* Someone check-me: is this error path enough? */
+               send_sig(SIGKILL, current, 0);
+               return retval;
+       }
+
        install_exec_creds(bprm);
        current->flags &= ~PF_FORKNOEXEC;
 
@@ -410,13 +417,6 @@ beyond_if:
 
        set_brk(current->mm->start_brk, current->mm->brk);
 
-       retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-       if (retval < 0) {
-               /* Someone check-me: is this error path enough? */
-               send_sig(SIGKILL, current, 0);
-               return retval;
-       }
-
        current->mm->start_stack =
                (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
        /* start thread */
index da0b3ca..382f75d 100644 (file)
@@ -7,7 +7,6 @@
 typedef struct {
        unsigned int __softirq_pending;
        unsigned int __nmi_count;       /* arch dependent */
-       unsigned int irq0_irqs;
 #ifdef CONFIG_X86_LOCAL_APIC
        unsigned int apic_timer_irqs;   /* arch dependent */
        unsigned int irq_spurious_count;
index 205b063..74a2e31 100644 (file)
 
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
 extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
-                                            insn_byte_t last_pfx,
+                                            int lpfx_id,
                                             insn_attr_t esc_attr);
 extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
-                                           insn_byte_t last_pfx,
+                                           int lpfx_id,
                                            insn_attr_t esc_attr);
 extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
                                          insn_byte_t vex_m,
index 74df3f1..48eb30a 100644 (file)
@@ -96,12 +96,6 @@ struct insn {
 #define X86_VEX_P(vex) ((vex) & 0x03)          /* VEX3 Byte2, VEX2 Byte1 */
 #define X86_VEX_M_MAX  0x1f                    /* VEX3.M Maximum value */
 
-/* The last prefix is needed for two-byte and three-byte opcodes */
-static inline insn_byte_t insn_last_prefix(struct insn *insn)
-{
-       return insn->prefixes.bytes[3];
-}
-
 extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
 extern void insn_get_prefixes(struct insn *insn);
 extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
                return X86_VEX_P(insn->vex_prefix.bytes[2]);
 }
 
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+       if (insn_is_avx(insn))
+               return insn_vex_p_bits(insn);   /* VEX_p is a SIMD prefix id */
+
+       if (insn->prefixes.bytes[3])
+               return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+       return 0;
+}
+
 /* Offset of each field from kaddr */
 static inline int insn_offset_rex_prefix(struct insn *insn)
 {
index a32b18c..3a16c14 100644 (file)
@@ -9,12 +9,12 @@
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
 
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
 {
        asm goto("1:"
-               JUMP_LABEL_INITIAL_NOP
+               STATIC_KEY_INITIAL_NOP
                ".pushsection __jump_table,  \"aw\" \n\t"
                _ASM_ALIGN "\n\t"
                _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
index a6962d9..ccb8059 100644 (file)
 #define MSR_OFFCORE_RSP_0              0x000001a6
 #define MSR_OFFCORE_RSP_1              0x000001a7
 
+#define MSR_LBR_SELECT                 0x000001c8
+#define MSR_LBR_TOS                    0x000001c9
+#define MSR_LBR_NHM_FROM               0x00000680
+#define MSR_LBR_NHM_TO                 0x000006c0
+#define MSR_LBR_CORE_FROM              0x00000040
+#define MSR_LBR_CORE_TO                        0x00000060
+
 #define MSR_IA32_PEBS_ENABLE           0x000003f1
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
index a7d2db9..c0180fd 100644 (file)
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void)
        return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
 }
 
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
index 461ce43..e8fb2c7 100644 (file)
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
 
-#define PERF_EVENT_INDEX_OFFSET                        0
-
 /*
  * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
  * This flag is otherwise unused and ABI specified to be 0, so nobody should
index 431793e..34baa0e 100644 (file)
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
 static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-       unsigned long long quot;
-       unsigned long long rem;
        int cpu = smp_processor_id();
        unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
-       quot = (cyc >> CYC2NS_SCALE_FACTOR);
-       rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
-       ns += quot * per_cpu(cyc2ns, cpu) +
-               ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+       ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+                       (1UL << CYC2NS_SCALE_FACTOR));
        return ns;
 }
 
index 5369059..532d2e0 100644 (file)
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC)           += machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)            += relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)          += kprobes.o
+obj-$(CONFIG_OPTPROBES)                += kprobes-opt.o
 obj-$(CONFIG_MODULES)          += module.o
 obj-$(CONFIG_DOUBLEFAULT)      += doublefault_32.o
 obj-$(CONFIG_KGDB)             += kgdb.o
index f4773f4..0a44b90 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 
 #include <linux/io.h>
+#include <linux/sched.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
        if (c->x86_power & (1 << 8)) {
                set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
                set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+               if (!check_tsc_unstable())
+                       sched_clock_stable = 1;
        }
 
 #ifdef CONFIG_X86_64
index 5adce10..0a18d16 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/bitops.h>
+#include <linux/device.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
 #include <asm/compat.h>
 #include <asm/smp.h>
 #include <asm/alternative.h>
+#include <asm/timer.h>
 
 #include "perf_event.h"
 
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
        return 0;
 }
 
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+       u64 m = event->attr.branch_sample_type;
+       u64 b = 0;
+
+       /* must capture all branches */
+       if (!(m & PERF_SAMPLE_BRANCH_ANY))
+               return 0;
+
+       m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_user)
+               b |= PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_kernel)
+               b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+       /*
+        * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+        */
+
+       return m == b;
+}
+
 int x86_pmu_hw_config(struct perf_event *event)
 {
        if (event->attr.precise_ip) {
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
 
                if (event->attr.precise_ip > precise)
                        return -EOPNOTSUPP;
+               /*
+                * check that PEBS LBR correction does not conflict with
+                * whatever the user is asking with attr->branch_sample_type
+                */
+               if (event->attr.precise_ip > 1) {
+                       u64 *br_type = &event->attr.branch_sample_type;
+
+                       if (has_branch_stack(event)) {
+                               if (!precise_br_compat(event))
+                                       return -EOPNOTSUPP;
+
+                               /* branch_sample_type is compatible */
+
+                       } else {
+                               /*
+                                * user did not specify  branch_sample_type
+                                *
+                                * For PEBS fixups, we capture all
+                                * the branches at the priv level of the
+                                * event.
+                                */
+                               *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+                               if (!event->attr.exclude_user)
+                                       *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+                               if (!event->attr.exclude_kernel)
+                                       *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+                       }
+               }
        }
 
        /*
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
        /* mark unused */
        event->hw.extra_reg.idx = EXTRA_REG_NONE;
 
+       /* mark not used */
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
        return x86_pmu.hw_config(event);
 }
 
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
                break;
 
        case CPU_STARTING:
+               if (x86_pmu.attr_rdpmc)
+                       set_in_cr4(X86_CR4_PCE);
                if (x86_pmu.cpu_starting)
                        x86_pmu.cpu_starting(cpu);
                break;
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void)
                }
        }
 
+       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
        pr_info("... version:                %d\n",     x86_pmu.version);
        pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
        pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event)
        return err;
 }
 
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+               idx -= X86_PMC_IDX_FIXED;
+               idx |= 1 << 30;
+       }
+
+       return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+       bool enable = !!(unsigned long)info;
+
+       if (enable)
+               set_in_cr4(X86_CR4_PCE);
+       else
+               clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       unsigned long val = simple_strtoul(buf, NULL, 0);
+
+       if (!!val != !!x86_pmu.attr_rdpmc) {
+               x86_pmu.attr_rdpmc = !!val;
+               smp_call_function(change_rdpmc, (void *)val, 1);
+       }
+
+       return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+       &dev_attr_rdpmc.attr,
+       NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+       .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+       &x86_pmu_attr_group,
+       NULL,
+};
+
+static void x86_pmu_flush_branch_stack(void)
+{
+       if (x86_pmu.flush_branch_stack)
+               x86_pmu.flush_branch_stack();
+}
+
 static struct pmu pmu = {
-       .pmu_enable     = x86_pmu_enable,
-       .pmu_disable    = x86_pmu_disable,
+       .pmu_enable             = x86_pmu_enable,
+       .pmu_disable            = x86_pmu_disable,
+
+       .attr_groups    = x86_pmu_attr_groups,
 
        .event_init     = x86_pmu_event_init,
 
-       .add            = x86_pmu_add,
-       .del            = x86_pmu_del,
-       .start          = x86_pmu_start,
-       .stop           = x86_pmu_stop,
-       .read           = x86_pmu_read,
+       .add                    = x86_pmu_add,
+       .del                    = x86_pmu_del,
+       .start                  = x86_pmu_start,
+       .stop                   = x86_pmu_stop,
+       .read                   = x86_pmu_read,
 
        .start_txn      = x86_pmu_start_txn,
        .cancel_txn     = x86_pmu_cancel_txn,
        .commit_txn     = x86_pmu_commit_txn,
+
+       .event_idx      = x86_pmu_event_idx,
+       .flush_branch_stack     = x86_pmu_flush_branch_stack,
 };
 
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+               return;
+
+       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+               return;
+
+       userpg->time_mult = this_cpu_read(cyc2ns);
+       userpg->time_shift = CYC2NS_SCALE_FACTOR;
+       userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
 /*
  * callchain support
  */
index c30c807..8484e77 100644 (file)
@@ -33,6 +33,7 @@ enum extra_reg_type {
 
        EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
        EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
+       EXTRA_REG_LBR   = 2,    /* lbr_select */
 
        EXTRA_REG_MAX           /* number of entries needed */
 };
@@ -130,6 +131,8 @@ struct cpu_hw_events {
        void                            *lbr_context;
        struct perf_branch_stack        lbr_stack;
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
+       struct er_account               *lbr_sel;
+       u64                             br_sel;
 
        /*
         * Intel host/guest exclude bits
@@ -268,6 +271,29 @@ struct x86_pmu_quirk {
        void (*func)(void);
 };
 
+union x86_pmu_config {
+       struct {
+               u64 event:8,
+                   umask:8,
+                   usr:1,
+                   os:1,
+                   edge:1,
+                   pc:1,
+                   interrupt:1,
+                   __reserved1:1,
+                   en:1,
+                   inv:1,
+                   cmask:8,
+                   event2:4,
+                   __reserved2:4,
+                   go:1,
+                   ho:1;
+       } bits;
+       u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -309,10 +335,19 @@ struct x86_pmu {
        struct x86_pmu_quirk *quirks;
        int             perfctr_second_write;
 
+       /*
+        * sysfs attrs
+        */
+       int             attr_rdpmc;
+
+       /*
+        * CPU Hotplug hooks
+        */
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
        void            (*cpu_dying)(int cpu);
        void            (*cpu_dead)(int cpu);
+       void            (*flush_branch_stack)(void);
 
        /*
         * Intel Arch Perfmon v2+
@@ -334,6 +369,8 @@ struct x86_pmu {
         */
        unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
        int             lbr_nr;                    /* hardware stack size */
+       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
+       const int       *lbr_sel_map;              /* lbr_select mappings */
 
        /*
         * Extra registers for events
@@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint;
 
 extern struct event_constraint unconstrained;
 
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+       return ip > PAGE_OFFSET;
+#else
+       return (long)ip < 0;
+#endif
+}
+
 #ifdef CONFIG_CPU_SUP_AMD
 
 int amd_pmu_init(void);
@@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
 
 void intel_pmu_lbr_init_atom(void);
 
+void intel_pmu_lbr_init_snb(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
 int p4_pmu_init(void);
 
 int p6_pmu_init(void);
index 67250a5..dd002fa 100644 (file)
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
        if (ret)
                return ret;
 
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        if (event->attr.exclude_host && event->attr.exclude_guest)
                /*
                 * When HO == GO == 1 the hardware treats that as GO == HO == 0
index 3bd37bd..6a84e7f 100644 (file)
@@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
 #define NHM_LOCAL_DRAM         (1 << 14)
 #define NHM_NON_DRAM           (1 << 15)
 
-#define NHM_ALL_DRAM           (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE             (NHM_REMOTE_DRAM)
 
 #define NHM_DMND_READ          (NHM_DMND_DATA_RD)
 #define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
 #define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
 
 #define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
 #define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
  },
  [ C(NODE) ] = {
        [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
        },
        [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
        },
        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
        },
  },
 };
@@ -727,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+       /* user explicitly requested branch sampling */
+       if (has_branch_stack(event))
+               return true;
+
+       /* implicit branch sampling to correct PEBS skid */
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+               return true;
+
+       return false;
+}
+
 static void intel_pmu_disable_all(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
        cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
        cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
 
+       /*
+        * must disable before any actual event
+        * because any event may be combined with LBR
+        */
+       if (intel_pmu_needs_lbr_smpl(event))
+               intel_pmu_lbr_disable(event);
+
        if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                intel_pmu_disable_fixed(hwc);
                return;
@@ -935,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
                intel_pmu_enable_bts(hwc->config);
                return;
        }
+       /*
+        * must enabled before any actual event
+        * because any event may be combined with LBR
+        */
+       if (intel_pmu_needs_lbr_smpl(event))
+               intel_pmu_lbr_enable(event);
 
        if (event->attr.exclude_host)
                cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1084,9 @@ again:
 
                data.period = event->hw.last_period;
 
+               if (has_branch_stack(event))
+                       data.br_stack = &cpuc->lbr_stack;
+
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
        }
@@ -1123,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
  */
 static struct event_constraint *
 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-                                  struct perf_event *event)
+                                  struct perf_event *event,
+                                  struct hw_perf_event_extra *reg)
 {
        struct event_constraint *c = &emptyconstraint;
-       struct hw_perf_event_extra *reg = &event->hw.extra_reg;
        struct er_account *era;
        unsigned long flags;
        int orig_idx = reg->idx;
 
        /* already allocated shared msr */
        if (reg->alloc)
-               return &unconstrained;
+               return NULL; /* call x86_get_event_constraint() */
 
 again:
        era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1186,10 @@ again:
                reg->alloc = 1;
 
                /*
-                * All events using extra_reg are unconstrained.
-                * Avoids calling x86_get_event_constraints()
-                *
-                * Must revisit if extra_reg controlling events
-                * ever have constraints. Worst case we go through
-                * the regular event constraint table.
+                * need to call x86_get_event_constraint()
+                * to check if associated event has constraints
                 */
-               c = &unconstrained;
+               c = NULL;
        } else if (intel_try_alt_er(event, orig_idx)) {
                raw_spin_unlock_irqrestore(&era->lock, flags);
                goto again;
@@ -1200,11 +1226,23 @@ static struct event_constraint *
 intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
                              struct perf_event *event)
 {
-       struct event_constraint *c = NULL;
-
-       if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
-               c = __intel_shared_reg_get_constraints(cpuc, event);
-
+       struct event_constraint *c = NULL, *d;
+       struct hw_perf_event_extra *xreg, *breg;
+
+       xreg = &event->hw.extra_reg;
+       if (xreg->idx != EXTRA_REG_NONE) {
+               c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+               if (c == &emptyconstraint)
+                       return c;
+       }
+       breg = &event->hw.branch_reg;
+       if (breg->idx != EXTRA_REG_NONE) {
+               d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+               if (d == &emptyconstraint) {
+                       __intel_shared_reg_put_constraints(cpuc, xreg);
+                       c = d;
+               }
+       }
        return c;
 }
 
@@ -1252,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
        reg = &event->hw.extra_reg;
        if (reg->idx != EXTRA_REG_NONE)
                __intel_shared_reg_put_constraints(cpuc, reg);
+
+       reg = &event->hw.branch_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
 }
 
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1287,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
                 *
                 * Thereby we gain a PEBS capable cycle counter.
                 */
-               u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+               u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
 
                alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
                event->hw.config = alt_config;
        }
 
+       if (intel_pmu_needs_lbr_smpl(event)) {
+               ret = intel_pmu_setup_lbr_filter(event);
+               if (ret)
+                       return ret;
+       }
+
        if (event->attr.type != PERF_TYPE_RAW)
                return 0;
 
@@ -1431,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-       if (!x86_pmu.extra_regs)
+       if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
                return NOTIFY_OK;
 
        cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
         */
        intel_pmu_lbr_reset();
 
-       if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+       cpuc->lbr_sel = NULL;
+
+       if (!cpuc->shared_regs)
                return;
 
-       for_each_cpu(i, topology_thread_cpumask(cpu)) {
-               struct intel_shared_regs *pc;
+       if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+               for_each_cpu(i, topology_thread_cpumask(cpu)) {
+                       struct intel_shared_regs *pc;
 
-               pc = per_cpu(cpu_hw_events, i).shared_regs;
-               if (pc && pc->core_id == core_id) {
-                       cpuc->kfree_on_online = cpuc->shared_regs;
-                       cpuc->shared_regs = pc;
-                       break;
+                       pc = per_cpu(cpu_hw_events, i).shared_regs;
+                       if (pc && pc->core_id == core_id) {
+                               cpuc->kfree_on_online = cpuc->shared_regs;
+                               cpuc->shared_regs = pc;
+                               break;
+                       }
                }
+               cpuc->shared_regs->core_id = core_id;
+               cpuc->shared_regs->refcnt++;
        }
 
-       cpuc->shared_regs->core_id = core_id;
-       cpuc->shared_regs->refcnt++;
+       if (x86_pmu.lbr_sel_map)
+               cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
 }
 
 static void intel_pmu_cpu_dying(int cpu)
@@ -1486,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
        fini_debug_store_on_cpu(cpu);
 }
 
+static void intel_pmu_flush_branch_stack(void)
+{
+       /*
+        * Intel LBR does not tag entries with the
+        * PID of the current task, then we need to
+        * flush it on ctxsw
+        * For now, we simply reset it
+        */
+       if (x86_pmu.lbr_nr)
+               intel_pmu_lbr_reset();
+}
+
 static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
@@ -1513,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
        .guest_get_msrs         = intel_guest_get_msrs,
+       .flush_branch_stack     = intel_pmu_flush_branch_stack,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -1689,9 +1757,11 @@ __init int intel_pmu_init(void)
                x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
                /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
                x86_add_quirk(intel_nehalem_quirk);
 
@@ -1726,9 +1796,11 @@ __init int intel_pmu_init(void)
                x86_pmu.er_flags |= ERF_HAS_RSP_1;
 
                /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
                pr_cont("Westmere events, ");
                break;
@@ -1739,7 +1811,7 @@ __init int intel_pmu_init(void)
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
 
-               intel_pmu_lbr_init_nhm();
+               intel_pmu_lbr_init_snb();
 
                x86_pmu.event_constraints = intel_snb_event_constraints;
                x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1749,9 +1821,11 @@ __init int intel_pmu_init(void)
                x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
                /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
 
                pr_cont("SandyBridge events, ");
                break;
index d6bd49f..7f64df1 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 
 #include <asm/perf_event.h>
+#include <asm/insn.h>
 
 #include "perf_event.h"
 
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
-       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
-               intel_pmu_lbr_enable(event);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
-       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
-               intel_pmu_lbr_disable(event);
 }
 
 void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
                wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 }
 
-#include <asm/insn.h>
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
-       return ip > PAGE_OFFSET;
-#else
-       return (long)ip < 0;
-#endif
-}
-
 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
         * both formats and we don't use the other fields in this
         * routine.
         */
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct pebs_record_core *pebs = __pebs;
        struct perf_sample_data data;
        struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
        else
                regs.flags &= ~PERF_EFLAGS_EXACT;
 
+       if (has_branch_stack(event))
+               data.br_stack = &cpuc->lbr_stack;
+
        if (perf_event_overflow(event, &data, &regs))
                x86_pmu_stop(event, 0);
 }
index 47a7e63..520b426 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <asm/perf_event.h>
 #include <asm/msr.h>
+#include <asm/insn.h>
 
 #include "perf_event.h"
 
@@ -14,6 +15,100 @@ enum {
 };
 
 /*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT         0 /* do not capture at ring0 */
+#define LBR_USER_BIT           1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT            2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT       3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT       4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT         5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
+#define LBR_FAR_BIT            8 /* do not capture far branches */
+
+#define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
+#define LBR_USER       (1 << LBR_USER_BIT)
+#define LBR_JCC                (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL   (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL   (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN     (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR                (1 << LBR_FAR_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK   0x1ff   /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP   -1      /* LBR filter not supported */
+#define LBR_IGN                0       /* ignored */
+
+#define LBR_ANY                 \
+       (LBR_JCC        |\
+        LBR_REL_CALL   |\
+        LBR_IND_CALL   |\
+        LBR_RETURN     |\
+        LBR_REL_JMP    |\
+        LBR_IND_JMP    |\
+        LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+
+#define for_each_branch_sample_type(x) \
+       for ((x) = PERF_SAMPLE_BRANCH_USER; \
+            (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+       X86_BR_NONE     = 0,      /* unknown */
+
+       X86_BR_USER     = 1 << 0, /* branch target is user */
+       X86_BR_KERNEL   = 1 << 1, /* branch target is kernel */
+
+       X86_BR_CALL     = 1 << 2, /* call */
+       X86_BR_RET      = 1 << 3, /* return */
+       X86_BR_SYSCALL  = 1 << 4, /* syscall */
+       X86_BR_SYSRET   = 1 << 5, /* syscall return */
+       X86_BR_INT      = 1 << 6, /* sw interrupt */
+       X86_BR_IRET     = 1 << 7, /* return from interrupt */
+       X86_BR_JCC      = 1 << 8, /* conditional */
+       X86_BR_JMP      = 1 << 9, /* jump */
+       X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
+       X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+
+#define X86_BR_ANY       \
+       (X86_BR_CALL    |\
+        X86_BR_RET     |\
+        X86_BR_SYSCALL |\
+        X86_BR_SYSRET  |\
+        X86_BR_INT     |\
+        X86_BR_IRET    |\
+        X86_BR_JCC     |\
+        X86_BR_JMP      |\
+        X86_BR_IRQ      |\
+        X86_BR_IND_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL                 \
+       (X86_BR_CALL            |\
+        X86_BR_IND_CALL        |\
+        X86_BR_SYSCALL         |\
+        X86_BR_IRQ             |\
+        X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  * otherwise it becomes near impossible to get a reliable stack.
  */
@@ -21,6 +116,10 @@ enum {
 static void __intel_pmu_lbr_enable(void)
 {
        u64 debugctl;
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+       if (cpuc->lbr_sel)
+               wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
 
        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
        debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
         * Reset the LBR stack if we changed task context to
         * avoid data leaks.
         */
-
        if (event->ctx->task && cpuc->lbr_context != event->ctx) {
                intel_pmu_lbr_reset();
                cpuc->lbr_context = event->ctx;
        }
+       cpuc->br_sel = event->hw.branch_reg.reg;
 
        cpuc->lbr_users++;
 }
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
        cpuc->lbr_users--;
        WARN_ON_ONCE(cpuc->lbr_users < 0);
 
-       if (cpuc->enabled && !cpuc->lbr_users)
+       if (cpuc->enabled && !cpuc->lbr_users) {
                __intel_pmu_lbr_disable();
+               /* avoid stale pointer */
+               cpuc->lbr_context = NULL;
+       }
 }
 
 void intel_pmu_lbr_enable_all(void)
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
                __intel_pmu_lbr_disable();
 }
 
+/*
+ * TOS = most recently recorded branch
+ */
 static inline u64 intel_pmu_lbr_tos(void)
 {
        u64 tos;
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 
                rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
 
-               cpuc->lbr_entries[i].from  = msr_lastbranch.from;
-               cpuc->lbr_entries[i].to    = msr_lastbranch.to;
-               cpuc->lbr_entries[i].flags = 0;
+               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
+               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
+               cpuc->lbr_entries[i].mispred    = 0;
+               cpuc->lbr_entries[i].predicted  = 0;
+               cpuc->lbr_entries[i].reserved   = 0;
        }
        cpuc->lbr_stack.nr = i;
 }
 
-#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
-
 /*
  * Due to lack of segmentation in Linux the effective address (offset)
  * is the same as the linear address, allowing us to merge the LIP and EIP
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 
        for (i = 0; i < x86_pmu.lbr_nr; i++) {
                unsigned long lbr_idx = (tos - i) & mask;
-               u64 from, to, flags = 0;
+               u64 from, to, mis = 0, pred = 0;
 
                rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
                rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
                if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
-                       flags = !!(from & LBR_FROM_FLAG_MISPRED);
+                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
+                       pred = !mis;
                        from = (u64)((((s64)from) << 1) >> 1);
                }
 
-               cpuc->lbr_entries[i].from  = from;
-               cpuc->lbr_entries[i].to    = to;
-               cpuc->lbr_entries[i].flags = flags;
+               cpuc->lbr_entries[i].from       = from;
+               cpuc->lbr_entries[i].to         = to;
+               cpuc->lbr_entries[i].mispred    = mis;
+               cpuc->lbr_entries[i].predicted  = pred;
+               cpuc->lbr_entries[i].reserved   = 0;
        }
        cpuc->lbr_stack.nr = i;
 }
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
                intel_pmu_lbr_read_32(cpuc);
        else
                intel_pmu_lbr_read_64(cpuc);
+
+       intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+       u64 br_type = event->attr.branch_sample_type;
+       int mask = 0;
+
+       if (br_type & PERF_SAMPLE_BRANCH_USER)
+               mask |= X86_BR_USER;
+
+       if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+               mask |= X86_BR_KERNEL;
+
+       /* we ignore BRANCH_HV here */
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY)
+               mask |= X86_BR_ANY;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+               mask |= X86_BR_ANY_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+               mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+       if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+               mask |= X86_BR_IND_CALL;
+       /*
+        * stash actual user request into reg, it may
+        * be used by fixup code for some CPU
+        */
+       event->hw.branch_reg.reg = mask;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+       u64 br_type = event->attr.branch_sample_type;
+       u64 mask = 0, m;
+       u64 v;
+
+       for_each_branch_sample_type(m) {
+               if (!(br_type & m))
+                       continue;
+
+               v = x86_pmu.lbr_sel_map[m];
+               if (v == LBR_NOT_SUPP)
+                       return -EOPNOTSUPP;
+
+               if (v != LBR_IGN)
+                       mask |= v;
+       }
+       reg = &event->hw.branch_reg;
+       reg->idx = EXTRA_REG_LBR;
+
+       /* LBR_SELECT operates in suppress mode so invert mask */
+       reg->config = ~mask & x86_pmu.lbr_sel_mask;
+
+       return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+       int ret = 0;
+
+       /*
+        * no LBR on this PMU
+        */
+       if (!x86_pmu.lbr_nr)
+               return -EOPNOTSUPP;
+
+       /*
+        * setup SW LBR filter
+        */
+       intel_pmu_setup_sw_lbr_filter(event);
+
+       /*
+        * setup HW LBR filter, if any
+        */
+       if (x86_pmu.lbr_sel_map)
+               ret = intel_pmu_setup_hw_lbr_filter(event);
+
+       return ret;
 }
 
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to)
+{
+       struct insn insn;
+       void *addr;
+       int bytes, size = MAX_INSN_SIZE;
+       int ret = X86_BR_NONE;
+       int ext, to_plm, from_plm;
+       u8 buf[MAX_INSN_SIZE];
+       int is64 = 0;
+
+       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+       from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+       /*
+        * maybe zero if lbr did not fill up after a reset by the time
+        * we get a PMU interrupt
+        */
+       if (from == 0 || to == 0)
+               return X86_BR_NONE;
+
+       if (from_plm == X86_BR_USER) {
+               /*
+                * can happen if measuring at the user level only
+                * and we interrupt in a kernel thread, e.g., idle.
+                */
+               if (!current->mm)
+                       return X86_BR_NONE;
+
+               /* may fail if text not present */
+               bytes = copy_from_user_nmi(buf, (void __user *)from, size);
+               if (bytes != size)
+                       return X86_BR_NONE;
+
+               addr = buf;
+       } else
+               addr = (void *)from;
+
+       /*
+        * decoder needs to know the ABI especially
+        * on 64-bit systems running 32-bit apps
+        */
+#ifdef CONFIG_X86_64
+       is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+       insn_init(&insn, addr, is64);
+       insn_get_opcode(&insn);
+
+       switch (insn.opcode.bytes[0]) {
+       case 0xf:
+               switch (insn.opcode.bytes[1]) {
+               case 0x05: /* syscall */
+               case 0x34: /* sysenter */
+                       ret = X86_BR_SYSCALL;
+                       break;
+               case 0x07: /* sysret */
+               case 0x35: /* sysexit */
+                       ret = X86_BR_SYSRET;
+                       break;
+               case 0x80 ... 0x8f: /* conditional */
+                       ret = X86_BR_JCC;
+                       break;
+               default:
+                       ret = X86_BR_NONE;
+               }
+               break;
+       case 0x70 ... 0x7f: /* conditional */
+               ret = X86_BR_JCC;
+               break;
+       case 0xc2: /* near ret */
+       case 0xc3: /* near ret */
+       case 0xca: /* far ret */
+       case 0xcb: /* far ret */
+               ret = X86_BR_RET;
+               break;
+       case 0xcf: /* iret */
+               ret = X86_BR_IRET;
+               break;
+       case 0xcc ... 0xce: /* int */
+               ret = X86_BR_INT;
+               break;
+       case 0xe8: /* call near rel */
+       case 0x9a: /* call far absolute */
+               ret = X86_BR_CALL;
+               break;
+       case 0xe0 ... 0xe3: /* loop jmp */
+               ret = X86_BR_JCC;
+               break;
+       case 0xe9 ... 0xeb: /* jmp */
+               ret = X86_BR_JMP;
+               break;
+       case 0xff: /* call near absolute, call far absolute ind */
+               insn_get_modrm(&insn);
+               ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+               switch (ext) {
+               case 2: /* near ind call */
+               case 3: /* far ind call */
+                       ret = X86_BR_IND_CALL;
+                       break;
+               case 4:
+               case 5:
+                       ret = X86_BR_JMP;
+                       break;
+               }
+               break;
+       default:
+               ret = X86_BR_NONE;
+       }
+       /*
+        * interrupts, traps, faults (and thus ring transition) may
+        * occur on any instructions. Thus, to classify them correctly,
+        * we need to first look at the from and to priv levels. If they
+        * are different and to is in the kernel, then it indicates
+        * a ring transition. If the from instruction is not a ring
+        * transition instr (syscall, systenter, int), then it means
+        * it was a irq, trap or fault.
+        *
+        * we have no way of detecting kernel to kernel faults.
+        */
+       if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+           && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+               ret = X86_BR_IRQ;
+
+       /*
+        * branch priv level determined by target as
+        * is done by HW when LBR_SELECT is implemented
+        */
+       if (ret != X86_BR_NONE)
+               ret |= to_plm;
+
+       return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+       u64 from, to;
+       int br_sel = cpuc->br_sel;
+       int i, j, type;
+       bool compress = false;
+
+       /* if sampling all branches, then nothing to filter */
+       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+               return;
+
+       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+               from = cpuc->lbr_entries[i].from;
+               to = cpuc->lbr_entries[i].to;
+
+               type = branch_type(from, to);
+
+               /* if type does not correspond, then discard */
+               if (type == X86_BR_NONE || (br_sel & type) != type) {
+                       cpuc->lbr_entries[i].from = 0;
+                       compress = true;
+               }
+       }
+
+       if (!compress)
+               return;
+
+       /* remove all entries with from=0 */
+       for (i = 0; i < cpuc->lbr_stack.nr; ) {
+               if (!cpuc->lbr_entries[i].from) {
+                       j = i;
+                       while (++j < cpuc->lbr_stack.nr)
+                               cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+                       cpuc->lbr_stack.nr--;
+                       if (!cpuc->lbr_entries[i].from)
+                               continue;
+               }
+               i++;
+       }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+       [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
+                                       | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+        */
+       [PERF_SAMPLE_BRANCH_ANY_CALL] =
+        LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+        */
+       [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+       [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL]   = LBR_REL_CALL | LBR_IND_CALL
+                                       | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL]   = LBR_IND_CALL,
+};
+
+/* core */
 void intel_pmu_lbr_init_core(void)
 {
        x86_pmu.lbr_nr     = 4;
-       x86_pmu.lbr_tos    = 0x01c9;
-       x86_pmu.lbr_from   = 0x40;
-       x86_pmu.lbr_to     = 0x60;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("4-deep LBR, ");
 }
 
+/* nehalem/westmere */
 void intel_pmu_lbr_init_nhm(void)
 {
        x86_pmu.lbr_nr     = 16;
-       x86_pmu.lbr_tos    = 0x01c9;
-       x86_pmu.lbr_from   = 0x680;
-       x86_pmu.lbr_to     = 0x6c0;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - workaround LBR_SEL errata (see above)
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void intel_pmu_lbr_init_snb(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
 }
 
+/* atom */
 void intel_pmu_lbr_init_atom(void)
 {
+       /*
+        * only models starting at stepping 10 seems
+        * to have an operational LBR which can freeze
+        * on PMU interrupt
+        */
+       if (boot_cpu_data.x86_mask < 10) {
+               pr_cont("LBR disabled due to erratum");
+               return;
+       }
+
        x86_pmu.lbr_nr     = 8;
-       x86_pmu.lbr_tos    = 0x01c9;
-       x86_pmu.lbr_from   = 0x40;
-       x86_pmu.lbr_to     = 0x60;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("8-deep LBR, ");
 }
index 40fc861..58b7f27 100644 (file)
@@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
        irqctx->tinfo.task = curctx->tinfo.task;
        irqctx->tinfo.previous_esp = current_stack_pointer;
 
-       /*
-        * Copy the softirq bits in preempt_count so that the
-        * softirq checks work in the hardirq context.
-        */
-       irqctx->tinfo.preempt_count =
-               (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
-               (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+       /* Copy the preempt_count so that the [soft]irq checks work. */
+       irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
 
        if (unlikely(overflow))
                call_on_stack(print_stack_overflow, isp);
@@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
        if (unlikely(!desc))
                return false;
 
-       if (!execute_on_irq_stack(overflow, desc, irq)) {
+       if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
                if (unlikely(overflow))
                        print_stack_overflow();
                desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644 (file)
index 0000000..3230b68
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef __X86_KERNEL_KPROBES_COMMON_H
+#define __X86_KERNEL_KPROBES_COMMON_H
+
+/* Kprobes and Optprobes common header */
+
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING                       \
+       /* Skip cs, ip, orig_ax. */             \
+       "       subq $24, %rsp\n"               \
+       "       pushq %rdi\n"                   \
+       "       pushq %rsi\n"                   \
+       "       pushq %rdx\n"                   \
+       "       pushq %rcx\n"                   \
+       "       pushq %rax\n"                   \
+       "       pushq %r8\n"                    \
+       "       pushq %r9\n"                    \
+       "       pushq %r10\n"                   \
+       "       pushq %r11\n"                   \
+       "       pushq %rbx\n"                   \
+       "       pushq %rbp\n"                   \
+       "       pushq %r12\n"                   \
+       "       pushq %r13\n"                   \
+       "       pushq %r14\n"                   \
+       "       pushq %r15\n"
+#define RESTORE_REGS_STRING                    \
+       "       popq %r15\n"                    \
+       "       popq %r14\n"                    \
+       "       popq %r13\n"                    \
+       "       popq %r12\n"                    \
+       "       popq %rbp\n"                    \
+       "       popq %rbx\n"                    \
+       "       popq %r11\n"                    \
+       "       popq %r10\n"                    \
+       "       popq %r9\n"                     \
+       "       popq %r8\n"                     \
+       "       popq %rax\n"                    \
+       "       popq %rcx\n"                    \
+       "       popq %rdx\n"                    \
+       "       popq %rsi\n"                    \
+       "       popq %rdi\n"                    \
+       /* Skip orig_ax, ip, cs */              \
+       "       addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING                       \
+       /* Skip cs, ip, orig_ax and gs. */      \
+       "       subl $16, %esp\n"               \
+       "       pushl %fs\n"                    \
+       "       pushl %es\n"                    \
+       "       pushl %ds\n"                    \
+       "       pushl %eax\n"                   \
+       "       pushl %ebp\n"                   \
+       "       pushl %edi\n"                   \
+       "       pushl %esi\n"                   \
+       "       pushl %edx\n"                   \
+       "       pushl %ecx\n"                   \
+       "       pushl %ebx\n"
+#define RESTORE_REGS_STRING                    \
+       "       popl %ebx\n"                    \
+       "       popl %ecx\n"                    \
+       "       popl %edx\n"                    \
+       "       popl %esi\n"                    \
+       "       popl %edi\n"                    \
+       "       popl %ebp\n"                    \
+       "       popl %eax\n"                    \
+       /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+       "       addl $24, %esp\n"
+#endif
+
+/* Ensure if the instruction can be boostable */
+extern int can_boost(kprobe_opcode_t *instruction);
+/* Recover instruction if given address is probed */
+extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
+                                        unsigned long addr);
+/*
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
+ */
+extern int __copy_instruction(u8 *dest, u8 *src);
+
+/* Generate a relative-jump/call instruction */
+extern void synthesize_reljump(void *from, void *to);
+extern void synthesize_relcall(void *from, void *to);
+
+#ifdef CONFIG_OPTPROBES
+extern int arch_init_optprobes(void);
+extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
+extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
+#else  /* !CONFIG_OPTPROBES */
+static inline int arch_init_optprobes(void)
+{
+       return 0;
+}
+static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+       return 0;
+}
+static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+       return addr;
+}
+#endif
+#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644 (file)
index 0000000..c5e410e
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ *  Kernel Probes Jump Optimization (Optprobes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
+
+#include "kprobes-common.h"
+
+unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+       struct optimized_kprobe *op;
+       struct kprobe *kp;
+       long offs;
+       int i;
+
+       for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+               kp = get_kprobe((void *)addr - i);
+               /* This function only handles jump-optimized kprobe */
+               if (kp && kprobe_optimized(kp)) {
+                       op = container_of(kp, struct optimized_kprobe, kp);
+                       /* If op->list is not empty, op is under optimizing */
+                       if (list_empty(&op->list))
+                               goto found;
+               }
+       }
+
+       return addr;
+found:
+       /*
+        * If the kprobe can be optimized, original bytes which can be
+        * overwritten by jump destination address. In this case, original
+        * bytes must be recovered from op->optinsn.copied_insn buffer.
+        */
+       memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+       if (addr == (unsigned long)kp->addr) {
+               buf[0] = kp->opcode;
+               memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+       } else {
+               offs = addr - (unsigned long)kp->addr - 1;
+               memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+       }
+
+       return (unsigned long)buf;
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+{
+#ifdef CONFIG_X86_64
+       *addr++ = 0x48;
+       *addr++ = 0xbf;
+#else
+       *addr++ = 0xb8;
+#endif
+       *(unsigned long *)addr = val;
+}
+
+static void __used __kprobes kprobes_optinsn_template_holder(void)
+{
+       asm volatile (
+                       ".global optprobe_template_entry\n"
+                       "optprobe_template_entry:\n"
+#ifdef CONFIG_X86_64
+                       /* We don't bother saving the ss register */
+                       "       pushq %rsp\n"
+                       "       pushfq\n"
+                       SAVE_REGS_STRING
+                       "       movq %rsp, %rsi\n"
+                       ".global optprobe_template_val\n"
+                       "optprobe_template_val:\n"
+                       ASM_NOP5
+                       ASM_NOP5
+                       ".global optprobe_template_call\n"
+                       "optprobe_template_call:\n"
+                       ASM_NOP5
+                       /* Move flags to rsp */
+                       "       movq 144(%rsp), %rdx\n"
+                       "       movq %rdx, 152(%rsp)\n"
+                       RESTORE_REGS_STRING
+                       /* Skip flags entry */
+                       "       addq $8, %rsp\n"
+                       "       popfq\n"
+#else /* CONFIG_X86_32 */
+                       "       pushf\n"
+                       SAVE_REGS_STRING
+                       "       movl %esp, %edx\n"
+                       ".global optprobe_template_val\n"
+                       "optprobe_template_val:\n"
+                       ASM_NOP5
+                       ".global optprobe_template_call\n"
+                       "optprobe_template_call:\n"
+                       ASM_NOP5
+                       RESTORE_REGS_STRING
+                       "       addl $4, %esp\n"        /* skip cs */
+                       "       popf\n"
+#endif
+                       ".global optprobe_template_end\n"
+                       "optprobe_template_end:\n");
+}
+
+#define TMPL_MOVE_IDX \
+       ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+       ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+       ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       unsigned long flags;
+
+       /* This is possible if op is under delayed unoptimizing */
+       if (kprobe_disabled(&op->kp))
+               return;
+
+       local_irq_save(flags);
+       if (kprobe_running()) {
+               kprobes_inc_nmissed_count(&op->kp);
+       } else {
+               /* Save skipped registers */
+#ifdef CONFIG_X86_64
+               regs->cs = __KERNEL_CS;
+#else
+               regs->cs = __KERNEL_CS | get_kernel_rpl();
+               regs->gs = 0;
+#endif
+               regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+               regs->orig_ax = ~0UL;
+
+               __this_cpu_write(current_kprobe, &op->kp);
+               kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+               opt_pre_handler(&op->kp, regs);
+               __this_cpu_write(current_kprobe, NULL);
+       }
+       local_irq_restore(flags);
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+       int len = 0, ret;
+
+       while (len < RELATIVEJUMP_SIZE) {
+               ret = __copy_instruction(dest + len, src + len);
+               if (!ret || !can_boost(dest + len))
+                       return -EINVAL;
+               len += ret;
+       }
+       /* Check whether the address range is reserved */
+       if (ftrace_text_reserved(src, src + len - 1) ||
+           alternatives_text_reserved(src, src + len - 1) ||
+           jump_label_text_reserved(src, src + len - 1))
+               return -EBUSY;
+
+       return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+       return ((insn->opcode.bytes[0] == 0xff &&
+               (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+               insn->opcode.bytes[0] == 0xea); /* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+       unsigned long target = 0;
+
+       switch (insn->opcode.bytes[0]) {
+       case 0xe0:      /* loopne */
+       case 0xe1:      /* loope */
+       case 0xe2:      /* loop */
+       case 0xe3:      /* jcxz */
+       case 0xe9:      /* near relative jump */
+       case 0xeb:      /* short relative jump */
+               break;
+       case 0x0f:
+               if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+                       break;
+               return 0;
+       default:
+               if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+                       break;
+               return 0;
+       }
+       target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+       return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+       unsigned long addr, size = 0, offset = 0;
+       struct insn insn;
+       kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+       /* Lookup symbol including addr */
+       if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
+               return 0;
+
+       /*
+        * Do not optimize in the entry code due to the unstable
+        * stack handling.
+        */
+       if ((paddr >= (unsigned long)__entry_text_start) &&
+           (paddr <  (unsigned long)__entry_text_end))
+               return 0;
+
+       /* Check there is enough space for a relative jump. */
+       if (size - offset < RELATIVEJUMP_SIZE)
+               return 0;
+
+       /* Decode instructions */
+       addr = paddr - offset;
+       while (addr < paddr - offset + size) { /* Decode until function end */
+               if (search_exception_tables(addr))
+                       /*
+                        * Since some fixup code will jumps into this function,
+                        * we can't optimize kprobe in this function.
+                        */
+                       return 0;
+               kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
+               insn_get_length(&insn);
+               /* Another subsystem puts a breakpoint */
+               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+                       return 0;
+               /* Recover address */
+               insn.kaddr = (void *)addr;
+               insn.next_byte = (void *)(addr + insn.length);
+               /* Check any instructions don't jump into target */
+               if (insn_is_indirect_jump(&insn) ||
+                   insn_jump_into_range(&insn, paddr + INT3_SIZE,
+                                        RELATIVE_ADDR_SIZE))
+                       return 0;
+               addr += insn.length;
+       }
+
+       return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+       int i;
+       struct kprobe *p;
+
+       for (i = 1; i < op->optinsn.size; i++) {
+               p = get_kprobe(op->kp.addr + i);
+               if (p && !kprobe_disabled(p))
+                       return -EEXIST;
+       }
+
+       return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes
+arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+{
+       return ((unsigned long)op->kp.addr <= addr &&
+               (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+       if (op->optinsn.insn) {
+               free_optinsn_slot(op->optinsn.insn, dirty);
+               op->optinsn.insn = NULL;
+               op->optinsn.size = 0;
+       }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+       __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ * This is called when new aggr(opt)probe is allocated or reused.
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+       u8 *buf;
+       int ret;
+       long rel;
+
+       if (!can_optimize((unsigned long)op->kp.addr))
+               return -EILSEQ;
+
+       op->optinsn.insn = get_optinsn_slot();
+       if (!op->optinsn.insn)
+               return -ENOMEM;
+
+       /*
+        * Verify if the address gap is in 2GB range, because this uses
+        * a relative jump.
+        */
+       rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+       if (abs(rel) > 0x7fffffff)
+               return -ERANGE;
+
+       buf = (u8 *)op->optinsn.insn;
+
+       /* Copy instructions into the out-of-line buffer */
+       ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+       if (ret < 0) {
+               __arch_remove_optimized_kprobe(op, 0);
+               return ret;
+       }
+       op->optinsn.size = ret;
+
+       /* Copy arch-dep-instance from template */
+       memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+       /* Set probe information */
+       synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+       /* Set probe function call */
+       synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+       /* Set returning jmp instruction at the tail of out-of-line buffer */
+       synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+                          (u8 *)op->kp.addr + op->optinsn.size);
+
+       flush_icache_range((unsigned long) buf,
+                          (unsigned long) buf + TMPL_END_IDX +
+                          op->optinsn.size + RELATIVEJUMP_SIZE);
+       return 0;
+}
+
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+       u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+                                           u8 *insn_buf,
+                                           struct optimized_kprobe *op)
+{
+       s32 rel = (s32)((long)op->optinsn.insn -
+                       ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+       /* Backup instructions which will be replaced by jump address */
+       memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+              RELATIVE_ADDR_SIZE);
+
+       insn_buf[0] = RELATIVEJUMP_OPCODE;
+       *(s32 *)(&insn_buf[1]) = rel;
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               WARN_ON(kprobe_disabled(&op->kp));
+               /* Setup param */
+               setup_optimize_kprobe(&jump_poke_params[c],
+                                     jump_poke_bufs[c].buf, op);
+               list_del_init(&op->list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
+
+       /*
+        * text_poke_smp doesn't support NMI/MCE code modifying.
+        * However, since kprobes itself also doesn't support NMI/MCE
+        * code probing, it's not a problem.
+        */
+       text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+                                             u8 *insn_buf,
+                                             struct optimized_kprobe *op)
+{
+       /* Set int3 to first byte for kprobes */
+       insn_buf[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               /* Setup param */
+               setup_unoptimize_kprobe(&jump_poke_params[c],
+                                       jump_poke_bufs[c].buf, op);
+               list_move(&op->list, done_list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
+
+       /*
+        * text_poke_smp doesn't support NMI/MCE code modifying.
+        * However, since kprobes itself also doesn't support NMI/MCE
+        * code probing, it's not a problem.
+        */
+       text_poke_smp_batch(jump_poke_params, c);
+}
+
+/* Replace a relative jump with a breakpoint (int3).  */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+       u8 buf[RELATIVEJUMP_SIZE];
+
+       /* Set int3 to first byte for kprobes */
+       buf[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+       text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+int  __kprobes
+setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+       struct optimized_kprobe *op;
+
+       if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+               /* This kprobe is really able to run optimized path. */
+               op = container_of(p, struct optimized_kprobe, kp);
+               /* Detour through copied instructions */
+               regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+               if (!reenter)
+                       reset_current_kprobe();
+               preempt_enable_no_resched();
+               return 1;
+       }
+       return 0;
+}
+
+int __kprobes arch_init_optprobes(void)
+{
+       /* Allocate code buffer and parameter array */
+       jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+                                MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_bufs)
+               return -ENOMEM;
+
+       jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+                                  MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_params) {
+               kfree(jump_poke_bufs);
+               jump_poke_bufs = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
index 7da647d..e213fc8 100644 (file)
  *             <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
  *             <prasanna@in.ibm.com> added function-return probes.
  * 2005-May    Rusty Lynch <rusty.lynch@intel.com>
- *             Added function return probes functionality
+ *             Added function return probes functionality
  * 2006-Feb    Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
- *             kprobe-booster and kretprobe-booster for i386.
+ *             kprobe-booster and kretprobe-booster for i386.
  * 2007-Dec    Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
- *             and kretprobe-booster for x86-64
+ *             and kretprobe-booster for x86-64
  * 2007-Dec    Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
- *             <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
- *             unified x86 kprobes code.
+ *             <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ *             unified x86 kprobes code.
  */
-
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
@@ -59,6 +58,8 @@
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 
+#include "kprobes-common.h"
+
 void jprobe_return_end(void);
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
                              doesn't switch kernel stack.*/
        {NULL, NULL}    /* Terminator */
 };
+
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
 static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes synthesize_reljump(void *from, void *to)
+void __kprobes synthesize_reljump(void *from, void *to)
 {
        __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
 }
 
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+void __kprobes synthesize_relcall(void *from, void *to)
+{
+       __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
 /*
  * Skip the prefixes of the instruction.
  */
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-static int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int __kprobes can_boost(kprobe_opcode_t *opcodes)
 {
        kprobe_opcode_t opcode;
        kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
        }
 }
 
-/* Recover the probed instruction at addr for further analysis. */
-static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+static unsigned long
+__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
 {
        struct kprobe *kp;
+
        kp = get_kprobe((void *)addr);
+       /* There is no probe, return original address */
        if (!kp)
-               return -EINVAL;
+               return addr;
 
        /*
         *  Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
         */
        memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
        buf[0] = kp->opcode;
-       return 0;
+       return (unsigned long)buf;
+}
+
+/*
+ * Recover the probed instruction at addr for further analysis.
+ * Caller must lock kprobes by kprobe_mutex, or disable preemption
+ * for preventing to release referencing kprobes.
+ */
+unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+       unsigned long __addr;
+
+       __addr = __recover_optprobed_insn(buf, addr);
+       if (__addr != addr)
+               return __addr;
+
+       return __recover_probed_insn(buf, addr);
 }
 
 /* Check if paddr is at an instruction boundary */
 static int __kprobes can_probe(unsigned long paddr)
 {
-       int ret;
-       unsigned long addr, offset = 0;
+       unsigned long addr, __addr, offset = 0;
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
 
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
        /* Decode instructions */
        addr = paddr - offset;
        while (addr < paddr) {
-               kernel_insn_init(&insn, (void *)addr);
-               insn_get_opcode(&insn);
-
                /*
                 * Check if the instruction has been modified by another
                 * kprobe, in which case we replace the breakpoint by the
                 * original instruction in our buffer.
+                * Also, jump optimization will change the breakpoint to
+                * relative-jump. Since the relative-jump itself is
+                * normally used, we just go through if there is no kprobe.
                 */
-               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
-                       ret = recover_probed_instruction(buf, addr);
-                       if (ret)
-                               /*
-                                * Another debugging subsystem might insert
-                                * this breakpoint. In that case, we can't
-                                * recover it.
-                                */
-                               return 0;
-                       kernel_insn_init(&insn, buf);
-               }
+               __addr = recover_probed_instruction(buf, addr);
+               kernel_insn_init(&insn, (void *)__addr);
                insn_get_length(&insn);
+
+               /*
+                * Another debugging subsystem might insert this breakpoint.
+                * In that case, we can't recover it.
+                */
+               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+                       return 0;
                addr += insn.length;
        }
 
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
  * If not, return null.
  * Only applicable to 64-bit x86.
  */
-static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
+int __kprobes __copy_instruction(u8 *dest, u8 *src)
 {
        struct insn insn;
-       int ret;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
 
-       kernel_insn_init(&insn, src);
-       if (recover) {
-               insn_get_opcode(&insn);
-               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
-                       ret = recover_probed_instruction(buf,
-                                                        (unsigned long)src);
-                       if (ret)
-                               return 0;
-                       kernel_insn_init(&insn, buf);
-               }
-       }
+       kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
        insn_get_length(&insn);
+       /* Another subsystem puts a breakpoint, failed to recover */
+       if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+               return 0;
        memcpy(dest, insn.kaddr, insn.length);
 
 #ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
                 * extension of the original signed 32-bit displacement would
                 * have given.
                 */
-               newdisp = (u8 *) src + (s64) insn.displacement.value -
-                         (u8 *) dest;
+               newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
                BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check.  */
                disp = (u8 *) dest + insn_offset_displacement(&insn);
                *(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
 
 static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
+       /* Copy an instruction with recovering if other optprobe modifies it.*/
+       __copy_instruction(p->ainsn.insn, p->addr);
+
        /*
-        * Copy an instruction without recovering int3, because it will be
-        * put by another subsystem.
+        * __copy_instruction can modify the displacement of the instruction,
+        * but it doesn't affect boostable check.
         */
-       __copy_instruction(p->ainsn.insn, p->addr, 0);
-
-       if (can_boost(p->addr))
+       if (can_boost(p->ainsn.insn))
                p->ainsn.boostable = 0;
        else
                p->ainsn.boostable = -1;
 
-       p->opcode = *p->addr;
+       /* Also, displacement change doesn't affect the first byte */
+       p->opcode = p->ainsn.insn[0];
 }
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
        }
 }
 
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
-                                     struct pt_regs *regs)
+void __kprobes
+arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
        unsigned long *sara = stack_addr(regs);
 
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
        *sara = (unsigned long) &kretprobe_trampoline;
 }
 
-#ifdef CONFIG_OPTPROBES
-static int  __kprobes setup_detour_execution(struct kprobe *p,
-                                            struct pt_regs *regs,
-                                            int reenter);
-#else
-#define setup_detour_execution(p, regs, reenter) (0)
-#endif
-
-static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
-                                      struct kprobe_ctlblk *kcb, int reenter)
+static void __kprobes
+setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
 {
        if (setup_detour_execution(p, regs, reenter))
                return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
  * within the handler. We save the original kprobes variables and just single
  * step on the instruction of the new probe without calling any user handlers.
  */
-static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
-                                   struct kprobe_ctlblk *kcb)
+static int __kprobes
+reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
        switch (kcb->kprobe_status) {
        case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
        return 0;
 }
 
-#ifdef CONFIG_X86_64
-#define SAVE_REGS_STRING               \
-       /* Skip cs, ip, orig_ax. */     \
-       "       subq $24, %rsp\n"       \
-       "       pushq %rdi\n"           \
-       "       pushq %rsi\n"           \
-       "       pushq %rdx\n"           \
-       "       pushq %rcx\n"           \
-       "       pushq %rax\n"           \
-       "       pushq %r8\n"            \
-       "       pushq %r9\n"            \
-       "       pushq %r10\n"           \
-       "       pushq %r11\n"           \
-       "       pushq %rbx\n"           \
-       "       pushq %rbp\n"           \
-       "       pushq %r12\n"           \
-       "       pushq %r13\n"           \
-       "       pushq %r14\n"           \
-       "       pushq %r15\n"
-#define RESTORE_REGS_STRING            \
-       "       popq %r15\n"            \
-       "       popq %r14\n"            \
-       "       popq %r13\n"            \
-       "       popq %r12\n"            \
-       "       popq %rbp\n"            \
-       "       popq %rbx\n"            \
-       "       popq %r11\n"            \
-       "       popq %r10\n"            \
-       "       popq %r9\n"             \
-       "       popq %r8\n"             \
-       "       popq %rax\n"            \
-       "       popq %rcx\n"            \
-       "       popq %rdx\n"            \
-       "       popq %rsi\n"            \
-       "       popq %rdi\n"            \
-       /* Skip orig_ax, ip, cs */      \
-       "       addq $24, %rsp\n"
-#else
-#define SAVE_REGS_STRING               \
-       /* Skip cs, ip, orig_ax and gs. */      \
-       "       subl $16, %esp\n"       \
-       "       pushl %fs\n"            \
-       "       pushl %es\n"            \
-       "       pushl %ds\n"            \
-       "       pushl %eax\n"           \
-       "       pushl %ebp\n"           \
-       "       pushl %edi\n"           \
-       "       pushl %esi\n"           \
-       "       pushl %edx\n"           \
-       "       pushl %ecx\n"           \
-       "       pushl %ebx\n"
-#define RESTORE_REGS_STRING            \
-       "       popl %ebx\n"            \
-       "       popl %ecx\n"            \
-       "       popl %edx\n"            \
-       "       popl %esi\n"            \
-       "       popl %edi\n"            \
-       "       popl %ebp\n"            \
-       "       popl %eax\n"            \
-       /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
-       "       addl $24, %esp\n"
-#endif
-
 /*
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
  * jump instruction after the copied instruction, that jumps to the next
  * instruction after the probepoint.
  */
-static void __kprobes resume_execution(struct kprobe *p,
-               struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void __kprobes
+resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
        unsigned long *tos = stack_addr(regs);
        unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 /*
  * Wrapper routine for handling exceptions.
  */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-                                      unsigned long val, void *data)
+int __kprobes
+kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
 {
        struct die_args *args = data;
        int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
        return 0;
 }
 
-
-#ifdef CONFIG_OPTPROBES
-
-/* Insert a call instruction at address 'from', which calls address 'to'.*/
-static void __kprobes synthesize_relcall(void *from, void *to)
-{
-       __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
-}
-
-/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
-                                         unsigned long val)
-{
-#ifdef CONFIG_X86_64
-       *addr++ = 0x48;
-       *addr++ = 0xbf;
-#else
-       *addr++ = 0xb8;
-#endif
-       *(unsigned long *)addr = val;
-}
-
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
-       asm volatile (
-                       ".global optprobe_template_entry\n"
-                       "optprobe_template_entry: \n"
-#ifdef CONFIG_X86_64
-                       /* We don't bother saving the ss register */
-                       "       pushq %rsp\n"
-                       "       pushfq\n"
-                       SAVE_REGS_STRING
-                       "       movq %rsp, %rsi\n"
-                       ".global optprobe_template_val\n"
-                       "optprobe_template_val: \n"
-                       ASM_NOP5
-                       ASM_NOP5
-                       ".global optprobe_template_call\n"
-                       "optprobe_template_call: \n"
-                       ASM_NOP5
-                       /* Move flags to rsp */
-                       "       movq 144(%rsp), %rdx\n"
-                       "       movq %rdx, 152(%rsp)\n"
-                       RESTORE_REGS_STRING
-                       /* Skip flags entry */
-                       "       addq $8, %rsp\n"
-                       "       popfq\n"
-#else /* CONFIG_X86_32 */
-                       "       pushf\n"
-                       SAVE_REGS_STRING
-                       "       movl %esp, %edx\n"
-                       ".global optprobe_template_val\n"
-                       "optprobe_template_val: \n"
-                       ASM_NOP5
-                       ".global optprobe_template_call\n"
-                       "optprobe_template_call: \n"
-                       ASM_NOP5
-                       RESTORE_REGS_STRING
-                       "       addl $4, %esp\n"        /* skip cs */
-                       "       popf\n"
-#endif
-                       ".global optprobe_template_end\n"
-                       "optprobe_template_end: \n");
-}
-
-#define TMPL_MOVE_IDX \
-       ((long)&optprobe_template_val - (long)&optprobe_template_entry)
-#define TMPL_CALL_IDX \
-       ((long)&optprobe_template_call - (long)&optprobe_template_entry)
-#define TMPL_END_IDX \
-       ((long)&optprobe_template_end - (long)&optprobe_template_entry)
-
-#define INT3_SIZE sizeof(kprobe_opcode_t)
-
-/* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op,
-                                        struct pt_regs *regs)
-{
-       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       unsigned long flags;
-
-       /* This is possible if op is under delayed unoptimizing */
-       if (kprobe_disabled(&op->kp))
-               return;
-
-       local_irq_save(flags);
-       if (kprobe_running()) {
-               kprobes_inc_nmissed_count(&op->kp);
-       } else {
-               /* Save skipped registers */
-#ifdef CONFIG_X86_64
-               regs->cs = __KERNEL_CS;
-#else
-               regs->cs = __KERNEL_CS | get_kernel_rpl();
-               regs->gs = 0;
-#endif
-               regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
-               regs->orig_ax = ~0UL;
-
-               __this_cpu_write(current_kprobe, &op->kp);
-               kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-               opt_pre_handler(&op->kp, regs);
-               __this_cpu_write(current_kprobe, NULL);
-       }
-       local_irq_restore(flags);
-}
-
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
-{
-       int len = 0, ret;
-
-       while (len < RELATIVEJUMP_SIZE) {
-               ret = __copy_instruction(dest + len, src + len, 1);
-               if (!ret || !can_boost(dest + len))
-                       return -EINVAL;
-               len += ret;
-       }
-       /* Check whether the address range is reserved */
-       if (ftrace_text_reserved(src, src + len - 1) ||
-           alternatives_text_reserved(src, src + len - 1) ||
-           jump_label_text_reserved(src, src + len - 1))
-               return -EBUSY;
-
-       return len;
-}
-
-/* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
-{
-       return ((insn->opcode.bytes[0] == 0xff &&
-               (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
-               insn->opcode.bytes[0] == 0xea); /* Segment based jump */
-}
-
-/* Check whether insn jumps into specified address range */
-static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
-{
-       unsigned long target = 0;
-
-       switch (insn->opcode.bytes[0]) {
-       case 0xe0:      /* loopne */
-       case 0xe1:      /* loope */
-       case 0xe2:      /* loop */
-       case 0xe3:      /* jcxz */
-       case 0xe9:      /* near relative jump */
-       case 0xeb:      /* short relative jump */
-               break;
-       case 0x0f:
-               if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
-                       break;
-               return 0;
-       default:
-               if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
-                       break;
-               return 0;
-       }
-       target = (unsigned long)insn->next_byte + insn->immediate.value;
-
-       return (start <= target && target <= start + len);
-}
-
-/* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
-{
-       int ret;
-       unsigned long addr, size = 0, offset = 0;
-       struct insn insn;
-       kprobe_opcode_t buf[MAX_INSN_SIZE];
-
-       /* Lookup symbol including addr */
-       if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
-               return 0;
-
-       /*
-        * Do not optimize in the entry code due to the unstable
-        * stack handling.
-        */
-       if ((paddr >= (unsigned long )__entry_text_start) &&
-           (paddr <  (unsigned long )__entry_text_end))
-               return 0;
-
-       /* Check there is enough space for a relative jump. */
-       if (size - offset < RELATIVEJUMP_SIZE)
-               return 0;
-
-       /* Decode instructions */
-       addr = paddr - offset;
-       while (addr < paddr - offset + size) { /* Decode until function end */
-               if (search_exception_tables(addr))
-                       /*
-                        * Since some fixup code will jumps into this function,
-                        * we can't optimize kprobe in this function.
-                        */
-                       return 0;
-               kernel_insn_init(&insn, (void *)addr);
-               insn_get_opcode(&insn);
-               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
-                       ret = recover_probed_instruction(buf, addr);
-                       if (ret)
-                               return 0;
-                       kernel_insn_init(&insn, buf);
-               }
-               insn_get_length(&insn);
-               /* Recover address */
-               insn.kaddr = (void *)addr;
-               insn.next_byte = (void *)(addr + insn.length);
-               /* Check any instructions don't jump into target */
-               if (insn_is_indirect_jump(&insn) ||
-                   insn_jump_into_range(&insn, paddr + INT3_SIZE,
-                                        RELATIVE_ADDR_SIZE))
-                       return 0;
-               addr += insn.length;
-       }
-
-       return 1;
-}
-
-/* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
-{
-       int i;
-       struct kprobe *p;
-
-       for (i = 1; i < op->optinsn.size; i++) {
-               p = get_kprobe(op->kp.addr + i);
-               if (p && !kprobe_disabled(p))
-                       return -EEXIST;
-       }
-
-       return 0;
-}
-
-/* Check the addr is within the optimized instructions. */
-int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
-                                          unsigned long addr)
-{
-       return ((unsigned long)op->kp.addr <= addr &&
-               (unsigned long)op->kp.addr + op->optinsn.size > addr);
-}
-
-/* Free optimized instruction slot */
-static __kprobes
-void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
-{
-       if (op->optinsn.insn) {
-               free_optinsn_slot(op->optinsn.insn, dirty);
-               op->optinsn.insn = NULL;
-               op->optinsn.size = 0;
-       }
-}
-
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
-{
-       __arch_remove_optimized_kprobe(op, 1);
-}
-
-/*
- * Copy replacing target instructions
- * Target instructions MUST be relocatable (checked inside)
- */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
-{
-       u8 *buf;
-       int ret;
-       long rel;
-
-       if (!can_optimize((unsigned long)op->kp.addr))
-               return -EILSEQ;
-
-       op->optinsn.insn = get_optinsn_slot();
-       if (!op->optinsn.insn)
-               return -ENOMEM;
-
-       /*
-        * Verify if the address gap is in 2GB range, because this uses
-        * a relative jump.
-        */
-       rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
-       if (abs(rel) > 0x7fffffff)
-               return -ERANGE;
-
-       buf = (u8 *)op->optinsn.insn;
-
-       /* Copy instructions into the out-of-line buffer */
-       ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
-       if (ret < 0) {
-               __arch_remove_optimized_kprobe(op, 0);
-               return ret;
-       }
-       op->optinsn.size = ret;
-
-       /* Copy arch-dep-instance from template */
-       memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
-
-       /* Set probe information */
-       synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
-
-       /* Set probe function call */
-       synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
-
-       /* Set returning jmp instruction at the tail of out-of-line buffer */
-       synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
-                          (u8 *)op->kp.addr + op->optinsn.size);
-
-       flush_icache_range((unsigned long) buf,
-                          (unsigned long) buf + TMPL_END_IDX +
-                          op->optinsn.size + RELATIVEJUMP_SIZE);
-       return 0;
-}
-
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
-       u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
-                                           u8 *insn_buf,
-                                           struct optimized_kprobe *op)
-{
-       s32 rel = (s32)((long)op->optinsn.insn -
-                       ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
-       /* Backup instructions which will be replaced by jump address */
-       memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
-              RELATIVE_ADDR_SIZE);
-
-       insn_buf[0] = RELATIVEJUMP_OPCODE;
-       *(s32 *)(&insn_buf[1]) = rel;
-
-       tprm->addr = op->kp.addr;
-       tprm->opcode = insn_buf;
-       tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Replace breakpoints (int3) with relative jumps.
- * Caller must call with locking kprobe_mutex and text_mutex.
- */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
-{
-       struct optimized_kprobe *op, *tmp;
-       int c = 0;
-
-       list_for_each_entry_safe(op, tmp, oplist, list) {
-               WARN_ON(kprobe_disabled(&op->kp));
-               /* Setup param */
-               setup_optimize_kprobe(&jump_poke_params[c],
-                                     jump_poke_bufs[c].buf, op);
-               list_del_init(&op->list);
-               if (++c >= MAX_OPTIMIZE_PROBES)
-                       break;
-       }
-
-       /*
-        * text_poke_smp doesn't support NMI/MCE code modifying.
-        * However, since kprobes itself also doesn't support NMI/MCE
-        * code probing, it's not a problem.
-        */
-       text_poke_smp_batch(jump_poke_params, c);
-}
-
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
-                                             u8 *insn_buf,
-                                             struct optimized_kprobe *op)
-{
-       /* Set int3 to first byte for kprobes */
-       insn_buf[0] = BREAKPOINT_INSTRUCTION;
-       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
-       tprm->addr = op->kp.addr;
-       tprm->opcode = insn_buf;
-       tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Recover original instructions and breakpoints from relative jumps.
- * Caller must call with locking kprobe_mutex.
- */
-extern void arch_unoptimize_kprobes(struct list_head *oplist,
-                                   struct list_head *done_list)
-{
-       struct optimized_kprobe *op, *tmp;
-       int c = 0;
-
-       list_for_each_entry_safe(op, tmp, oplist, list) {
-               /* Setup param */
-               setup_unoptimize_kprobe(&jump_poke_params[c],
-                                       jump_poke_bufs[c].buf, op);
-               list_move(&op->list, done_list);
-               if (++c >= MAX_OPTIMIZE_PROBES)
-                       break;
-       }
-
-       /*
-        * text_poke_smp doesn't support NMI/MCE code modifying.
-        * However, since kprobes itself also doesn't support NMI/MCE
-        * code probing, it's not a problem.
-        */
-       text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
-       u8 buf[RELATIVEJUMP_SIZE];
-
-       /* Set int3 to first byte for kprobes */
-       buf[0] = BREAKPOINT_INSTRUCTION;
-       memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-       text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
-}
-
-static int  __kprobes setup_detour_execution(struct kprobe *p,
-                                            struct pt_regs *regs,
-                                            int reenter)
-{
-       struct optimized_kprobe *op;
-
-       if (p->flags & KPROBE_FLAG_OPTIMIZED) {
-               /* This kprobe is really able to run optimized path. */
-               op = container_of(p, struct optimized_kprobe, kp);
-               /* Detour through copied instructions */
-               regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
-               if (!reenter)
-                       reset_current_kprobe();
-               preempt_enable_no_resched();
-               return 1;
-       }
-       return 0;
-}
-
-static int __kprobes init_poke_params(void)
-{
-       /* Allocate code buffer and parameter array */
-       jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
-                                MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-       if (!jump_poke_bufs)
-               return -ENOMEM;
-
-       jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
-                                  MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-       if (!jump_poke_params) {
-               kfree(jump_poke_bufs);
-               jump_poke_bufs = NULL;
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-#else  /* !CONFIG_OPTPROBES */
-static int __kprobes init_poke_params(void)
-{
-       return 0;
-}
-#endif
-
 int __init arch_init_kprobes(void)
 {
-       return init_poke_params();
+       return arch_init_optprobes();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
index f0c6fd6..694d801 100644 (file)
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void)
 static __init int activate_jump_labels(void)
 {
        if (has_steal_clock) {
-               jump_label_inc(&paravirt_steal_enabled);
+               static_key_slow_inc(&paravirt_steal_enabled);
                if (steal_acc)
-                       jump_label_inc(&paravirt_steal_rq_enabled);
+                       static_key_slow_inc(&paravirt_steal_rq_enabled);
        }
 
        return 0;
index d90272e..ada2f99 100644 (file)
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr)
        __native_flush_tlb_single(addr);
 }
 
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
 
 static u64 native_steal_clock(int cpu)
 {
index 15763af..44eefde 100644 (file)
@@ -377,8 +377,8 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
        if (hlt_use_halt()) {
-               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
-               trace_cpu_idle(1, smp_processor_id());
+               trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle_rcuidle(1, smp_processor_id());
                current_thread_info()->status &= ~TS_POLLING;
                /*
                 * TS_POLLING-cleared state must be visible before we
@@ -391,8 +391,8 @@ void default_idle(void)
                else
                        local_irq_enable();
                current_thread_info()->status |= TS_POLLING;
-               trace_power_end(smp_processor_id());
-               trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+               trace_power_end_rcuidle(smp_processor_id());
+               trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
        } else {
                local_irq_enable();
                /* loop is done by the caller */
@@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 static void mwait_idle(void)
 {
        if (!need_resched()) {
-               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
-               trace_cpu_idle(1, smp_processor_id());
+               trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle_rcuidle(1, smp_processor_id());
                if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
@@ -461,8 +461,8 @@ static void mwait_idle(void)
                        __sti_mwait(0, 0);
                else
                        local_irq_enable();
-               trace_power_end(smp_processor_id());
-               trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+               trace_power_end_rcuidle(smp_processor_id());
+               trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
        } else
                local_irq_enable();
 }
@@ -474,13 +474,13 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-       trace_power_start(POWER_CSTATE, 0, smp_processor_id());
-       trace_cpu_idle(0, smp_processor_id());
+       trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
+       trace_cpu_idle_rcuidle(0, smp_processor_id());
        local_irq_enable();
        while (!need_resched())
                cpu_relax();
-       trace_power_end(smp_processor_id());
-       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+       trace_power_end_rcuidle(smp_processor_id());
+       trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 /*
index c08d1ff..49888fe 100644 (file)
@@ -119,9 +119,7 @@ void cpu_idle(void)
                }
                rcu_idle_exit();
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index cfa5c90..e34257c 100644 (file)
@@ -156,9 +156,7 @@ void cpu_idle(void)
                }
 
                tick_nohz_idle_exit();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 66d250c..58f7816 100644 (file)
@@ -291,19 +291,6 @@ notrace static void __cpuinit start_secondary(void *unused)
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
        x86_platform.nmi_init();
 
-       /*
-        * Wait until the cpu which brought this one up marked it
-        * online before enabling interrupts. If we don't do that then
-        * we can end up waking up the softirq thread before this cpu
-        * reached the active state, which makes the scheduler unhappy
-        * and schedule the softirq thread on the wrong cpu. This is
-        * only observable with forced threaded interrupts, but in
-        * theory it could also happen w/o them. It's just way harder
-        * to achieve.
-        */
-       while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
-               cpu_relax();
-
        /* enable local interrupts */
        local_irq_enable();
 
index dd5fbf4..c6eba2b 100644 (file)
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
  */
 static irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
-       /* Keep nmi watchdog up to date */
-       inc_irq_stat(irq0_irqs);
-
        global_clock_event->event_handler(global_clock_event);
 
        /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
index a62c201..183c592 100644 (file)
@@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 
        if (cpu_khz) {
                *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-               *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+               *offset = ns_now - mult_frac(tsc_now, *scale,
+                                            (1UL << CYC2NS_SCALE_FACTOR));
        }
 
        sched_clock_idle_wakeup_event(0);
index 9eba29b..fc25e60 100644 (file)
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
 /*
  * TSC-warp measurement loop running on both CPUs:
  */
-static __cpuinit void check_tsc_warp(void)
+static __cpuinit void check_tsc_warp(unsigned int timeout)
 {
        cycles_t start, now, prev, end;
        int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
        start = get_cycles();
        rdtsc_barrier();
        /*
-        * The measurement runs for 20 msecs:
+        * The measurement runs for 'timeout' msecs:
         */
-       end = start + tsc_khz * 20ULL;
+       end = start + (cycles_t) tsc_khz * timeout;
        now = start;
 
        for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
 }
 
 /*
+ * If the target CPU coming online doesn't have any of its core-siblings
+ * online, a timeout of 20msec will be used for the TSC-warp measurement
+ * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
+ * information about this socket already (and this information grows as we
+ * have more and more logical-siblings in that socket).
+ *
+ * Ideally we should be able to skip the TSC sync check on the other
+ * core-siblings, if the first logical CPU in a socket passed the sync test.
+ * But as the TSC is per-logical CPU and can potentially be modified wrongly
+ * by the bios, TSC sync test for smaller duration should be able
+ * to catch such errors. Also this will catch the condition where all the
+ * cores in the socket doesn't get reset at the same time.
+ */
+static inline unsigned int loop_timeout(int cpu)
+{
+       return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+}
+
+/*
  * Source CPU calls into this - it waits for the freshly booted
  * target CPU to arrive and then starts the measurement:
  */
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
         */
        atomic_inc(&start_count);
 
-       check_tsc_warp();
+       check_tsc_warp(loop_timeout(cpu));
 
        while (atomic_read(&stop_count) != cpus-1)
                cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
        while (atomic_read(&start_count) != cpus)
                cpu_relax();
 
-       check_tsc_warp();
+       check_tsc_warp(loop_timeout(smp_processor_id()));
 
        /*
         * Ok, we are done:
index fe15dcc..ea7b4fd 100644 (file)
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
 }
 
 static bool mmu_audit;
-static struct jump_label_key mmu_audit_key;
+static struct static_key mmu_audit_key;
 
 static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
 {
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
 
 static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
 {
-       if (static_branch((&mmu_audit_key)))
+       if (static_key_false((&mmu_audit_key)))
                __kvm_mmu_audit(vcpu, point);
 }
 
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
        if (mmu_audit)
                return;
 
-       jump_label_inc(&mmu_audit_key);
+       static_key_slow_inc(&mmu_audit_key);
        mmu_audit = true;
 }
 
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
        if (!mmu_audit)
                return;
 
-       jump_label_dec(&mmu_audit_key);
+       static_key_slow_dec(&mmu_audit_key);
        mmu_audit = false;
 }
 
index fc45ba8..e395693 100644 (file)
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
 }
 
 /* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
 {
-       unsigned long bclock, now;
+       u32 bclock, now, loops = __loops;
        int cpu;
 
        preempt_disable();
index 88ad5fb..c1f01a8 100644 (file)
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
        return inat_primary_table[opcode];
 }
 
-insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+       insn_attr_t lpfx_attr;
+
+       lpfx_attr = inat_get_opcode_attribute(last_pfx);
+       return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
                                      insn_attr_t esc_attr)
 {
        const insn_attr_t *table;
-       insn_attr_t lpfx_attr;
-       int n, m = 0;
+       int n;
 
        n = inat_escape_id(esc_attr);
-       if (last_pfx) {
-               lpfx_attr = inat_get_opcode_attribute(last_pfx);
-               m = inat_last_prefix_id(lpfx_attr);
-       }
+
        table = inat_escape_tables[n][0];
        if (!table)
                return 0;
-       if (inat_has_variant(table[opcode]) && m) {
-               table = inat_escape_tables[n][m];
+       if (inat_has_variant(table[opcode]) && lpfx_id) {
+               table = inat_escape_tables[n][lpfx_id];
                if (!table)
                        return 0;
        }
        return table[opcode];
 }
 
-insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
                                     insn_attr_t grp_attr)
 {
        const insn_attr_t *table;
-       insn_attr_t lpfx_attr;
-       int n, m = 0;
+       int n;
 
        n = inat_group_id(grp_attr);
-       if (last_pfx) {
-               lpfx_attr = inat_get_opcode_attribute(last_pfx);
-               m = inat_last_prefix_id(lpfx_attr);
-       }
+
        table = inat_group_tables[n][0];
        if (!table)
                return inat_group_common_attribute(grp_attr);
-       if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
-               table = inat_group_tables[n][m];
+       if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+               table = inat_group_tables[n][lpfx_id];
                if (!table)
                        return inat_group_common_attribute(grp_attr);
        }
index 5a1f9f3..25feb1a 100644 (file)
@@ -185,7 +185,8 @@ err_out:
 void insn_get_opcode(struct insn *insn)
 {
        struct insn_field *opcode = &insn->opcode;
-       insn_byte_t op, pfx;
+       insn_byte_t op;
+       int pfx_id;
        if (opcode->got)
                return;
        if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
                /* Get escaped opcode */
                op = get_next(insn_byte_t, insn);
                opcode->bytes[opcode->nbytes++] = op;
-               pfx = insn_last_prefix(insn);
-               insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+               pfx_id = insn_last_prefix_id(insn);
+               insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
        }
        if (inat_must_vex(insn->attr))
                insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
 void insn_get_modrm(struct insn *insn)
 {
        struct insn_field *modrm = &insn->modrm;
-       insn_byte_t pfx, mod;
+       insn_byte_t pfx_id, mod;
        if (modrm->got)
                return;
        if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
                modrm->value = mod;
                modrm->nbytes = 1;
                if (inat_is_group(insn->attr)) {
-                       pfx = insn_last_prefix(insn);
-                       insn->attr = inat_get_group_attribute(mod, pfx,
+                       pfx_id = insn_last_prefix_id(insn);
+                       insn->attr = inat_get_group_attribute(mod, pfx_id,
                                                              insn->attr);
                        if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
                                insn->attr = 0; /* This is bad */
index f581a18..8ecbb4b 100644 (file)
@@ -333,13 +333,15 @@ try_again:
                 * Lookup failure means no vma is above this address,
                 * i.e. return with success:
                 */
-               if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+               vma = find_vma(mm, addr);
+               if (!vma)
                        return addr;
 
                /*
                 * new region fits between prev_vma->vm_end and
                 * vma->vm_start, use it:
                 */
+               prev_vma = vma->vm_prev;
                if (addr + len <= vma->vm_start &&
                            (!prev_vma || (addr >= prev_vma->vm_end))) {
                        /* remember the address as a hint for next time */
index a312e76..49a5cb5 100644 (file)
@@ -60,6 +60,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
                        DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
                },
        },
+       /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
+       {
+               .callback = set_use_crs,
+               .ident = "MSI MS-7253",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
+                       DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
+                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+               },
+       },
 
        /* Now for the blacklist.. */
 
@@ -282,9 +292,6 @@ static void add_resources(struct pci_root_info *info)
        int i;
        struct resource *res, *root, *conflict;
 
-       if (!pci_use_crs)
-               return;
-
        coalesce_windows(info, IORESOURCE_MEM);
        coalesce_windows(info, IORESOURCE_IO);
 
@@ -336,8 +343,13 @@ get_current_resources(struct acpi_device *device, int busnum,
        acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
                                &info);
 
-       add_resources(&info);
-       return;
+       if (pci_use_crs) {
+               add_resources(&info);
+
+               return;
+       }
+
+       kfree(info.name);
 
 name_alloc_fail:
        kfree(info.res);
index 47041e7..2c90047 100644 (file)
@@ -113,9 +113,7 @@ void cpu_idle(void)
        while (1) {
                while (!need_resched())
                        platform_idle();
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
        }
 }
 
index 8b782a6..fb95dd2 100644 (file)
@@ -36,11 +36,23 @@ static void icq_free_icq_rcu(struct rcu_head *head)
        kmem_cache_free(icq->__rcu_icq_cache, icq);
 }
 
-/*
- * Exit and free an icq.  Called with both ioc and q locked.
- */
+/* Exit an icq. Called with both ioc and q locked. */
 static void ioc_exit_icq(struct io_cq *icq)
 {
+       struct elevator_type *et = icq->q->elevator->type;
+
+       if (icq->flags & ICQ_EXITED)
+               return;
+
+       if (et->ops.elevator_exit_icq_fn)
+               et->ops.elevator_exit_icq_fn(icq);
+
+       icq->flags |= ICQ_EXITED;
+}
+
+/* Release an icq.  Called with both ioc and q locked. */
+static void ioc_destroy_icq(struct io_cq *icq)
+{
        struct io_context *ioc = icq->ioc;
        struct request_queue *q = icq->q;
        struct elevator_type *et = q->elevator->type;
@@ -60,8 +72,7 @@ static void ioc_exit_icq(struct io_cq *icq)
        if (rcu_dereference_raw(ioc->icq_hint) == icq)
                rcu_assign_pointer(ioc->icq_hint, NULL);
 
-       if (et->ops.elevator_exit_icq_fn)
-               et->ops.elevator_exit_icq_fn(icq);
+       ioc_exit_icq(icq);
 
        /*
         * @icq->q might have gone away by the time RCU callback runs
@@ -79,7 +90,6 @@ static void ioc_release_fn(struct work_struct *work)
 {
        struct io_context *ioc = container_of(work, struct io_context,
                                              release_work);
-       struct request_queue *last_q = NULL;
        unsigned long flags;
 
        /*
@@ -93,44 +103,19 @@ static void ioc_release_fn(struct work_struct *work)
        while (!hlist_empty(&ioc->icq_list)) {
                struct io_cq *icq = hlist_entry(ioc->icq_list.first,
                                                struct io_cq, ioc_node);
-               struct request_queue *this_q = icq->q;
-
-               if (this_q != last_q) {
-                       /*
-                        * Need to switch to @this_q.  Once we release
-                        * @ioc->lock, it can go away along with @cic.
-                        * Hold on to it.
-                        */
-                       __blk_get_queue(this_q);
-
-                       /*
-                        * blk_put_queue() might sleep thanks to kobject
-                        * idiocy.  Always release both locks, put and
-                        * restart.
-                        */
-                       if (last_q) {
-                               spin_unlock(last_q->queue_lock);
-                               spin_unlock_irqrestore(&ioc->lock, flags);
-                               blk_put_queue(last_q);
-                       } else {
-                               spin_unlock_irqrestore(&ioc->lock, flags);
-                       }
-
-                       last_q = this_q;
-                       spin_lock_irqsave(this_q->queue_lock, flags);
-                       spin_lock_nested(&ioc->lock, 1);
-                       continue;
+               struct request_queue *q = icq->q;
+
+               if (spin_trylock(q->queue_lock)) {
+                       ioc_destroy_icq(icq);
+                       spin_unlock(q->queue_lock);
+               } else {
+                       spin_unlock_irqrestore(&ioc->lock, flags);
+                       cpu_relax();
+                       spin_lock_irqsave_nested(&ioc->lock, flags, 1);
                }
-               ioc_exit_icq(icq);
        }
 
-       if (last_q) {
-               spin_unlock(last_q->queue_lock);
-               spin_unlock_irqrestore(&ioc->lock, flags);
-               blk_put_queue(last_q);
-       } else {
-               spin_unlock_irqrestore(&ioc->lock, flags);
-       }
+       spin_unlock_irqrestore(&ioc->lock, flags);
 
        kmem_cache_free(iocontext_cachep, ioc);
 }
@@ -145,6 +130,7 @@ static void ioc_release_fn(struct work_struct *work)
 void put_io_context(struct io_context *ioc)
 {
        unsigned long flags;
+       bool free_ioc = false;
 
        if (ioc == NULL)
                return;
@@ -159,8 +145,13 @@ void put_io_context(struct io_context *ioc)
                spin_lock_irqsave(&ioc->lock, flags);
                if (!hlist_empty(&ioc->icq_list))
                        schedule_work(&ioc->release_work);
+               else
+                       free_ioc = true;
                spin_unlock_irqrestore(&ioc->lock, flags);
        }
+
+       if (free_ioc)
+               kmem_cache_free(iocontext_cachep, ioc);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -168,13 +159,41 @@ EXPORT_SYMBOL(put_io_context);
 void exit_io_context(struct task_struct *task)
 {
        struct io_context *ioc;
+       struct io_cq *icq;
+       struct hlist_node *n;
+       unsigned long flags;
 
        task_lock(task);
        ioc = task->io_context;
        task->io_context = NULL;
        task_unlock(task);
 
-       atomic_dec(&ioc->nr_tasks);
+       if (!atomic_dec_and_test(&ioc->nr_tasks)) {
+               put_io_context(ioc);
+               return;
+       }
+
+       /*
+        * Need ioc lock to walk icq_list and q lock to exit icq.  Perform
+        * reverse double locking.  Read comment in ioc_release_fn() for
+        * explanation on the nested locking annotation.
+        */
+retry:
+       spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+       hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) {
+               if (icq->flags & ICQ_EXITED)
+                       continue;
+               if (spin_trylock(icq->q->queue_lock)) {
+                       ioc_exit_icq(icq);
+                       spin_unlock(icq->q->queue_lock);
+               } else {
+                       spin_unlock_irqrestore(&ioc->lock, flags);
+                       cpu_relax();
+                       goto retry;
+               }
+       }
+       spin_unlock_irqrestore(&ioc->lock, flags);
+
        put_io_context(ioc);
 }
 
@@ -194,7 +213,7 @@ void ioc_clear_queue(struct request_queue *q)
                struct io_context *ioc = icq->ioc;
 
                spin_lock(&ioc->lock);
-               ioc_exit_icq(icq);
+               ioc_destroy_icq(icq);
                spin_unlock(&ioc->lock);
        }
 }
@@ -363,13 +382,13 @@ struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
        return icq;
 }
 
-void ioc_set_changed(struct io_context *ioc, int which)
+void ioc_set_icq_flags(struct io_context *ioc, unsigned int flags)
 {
        struct io_cq *icq;
        struct hlist_node *n;
 
        hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
-               set_bit(which, &icq->changed);
+               icq->flags |= flags;
 }
 
 /**
@@ -387,7 +406,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
 
        spin_lock_irqsave(&ioc->lock, flags);
        ioc->ioprio = ioprio;
-       ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
+       ioc_set_icq_flags(ioc, ICQ_IOPRIO_CHANGED);
        spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
@@ -404,11 +423,33 @@ void ioc_cgroup_changed(struct io_context *ioc)
        unsigned long flags;
 
        spin_lock_irqsave(&ioc->lock, flags);
-       ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
+       ioc_set_icq_flags(ioc, ICQ_CGROUP_CHANGED);
        spin_unlock_irqrestore(&ioc->lock, flags);
 }
 EXPORT_SYMBOL(ioc_cgroup_changed);
 
+/**
+ * icq_get_changed - fetch and clear icq changed mask
+ * @icq: icq of interest
+ *
+ * Fetch and clear ICQ_*_CHANGED bits from @icq.  Grabs and releases
+ * @icq->ioc->lock.
+ */
+unsigned icq_get_changed(struct io_cq *icq)
+{
+       unsigned int changed = 0;
+       unsigned long flags;
+
+       if (unlikely(icq->flags & ICQ_CHANGED_MASK)) {
+               spin_lock_irqsave(&icq->ioc->lock, flags);
+               changed = icq->flags & ICQ_CHANGED_MASK;
+               icq->flags &= ~ICQ_CHANGED_MASK;
+               spin_unlock_irqrestore(&icq->ioc->lock, flags);
+       }
+       return changed;
+}
+EXPORT_SYMBOL(icq_get_changed);
+
 static int __init blk_ioc_init(void)
 {
        iocontext_cachep = kmem_cache_create("blkdev_ioc",
index 1366a89..467c8de 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/sched.h>
 
 #include "blk.h"
 
@@ -103,9 +104,10 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
 
 void __blk_complete_request(struct request *req)
 {
-       int ccpu, cpu, group_cpu = NR_CPUS;
+       int ccpu, cpu;
        struct request_queue *q = req->q;
        unsigned long flags;
+       bool shared = false;
 
        BUG_ON(!q->softirq_done_fn);
 
@@ -117,22 +119,20 @@ void __blk_complete_request(struct request *req)
         */
        if (req->cpu != -1) {
                ccpu = req->cpu;
-               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
-                       ccpu = blk_cpu_to_group(ccpu);
-                       group_cpu = blk_cpu_to_group(cpu);
-               }
+               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
+                       shared = cpus_share_cache(cpu, ccpu);
        } else
                ccpu = cpu;
 
        /*
-        * If current CPU and requested CPU are in the same group, running
-        * softirq in current CPU. One might concern this is just like
+        * If current CPU and requested CPU share a cache, run the softirq on
+        * the current CPU. One might concern this is just like
         * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
         * running in interrupt handler, and currently I/O controller doesn't
         * support multiple interrupts, so current CPU is unique actually. This
         * avoids IPI sending from current CPU to the first CPU of a group.
         */
-       if (ccpu == cpu || ccpu == group_cpu) {
+       if (ccpu == cpu || shared) {
                struct list_head *list;
 do_local:
                list = &__get_cpu_var(blk_cpu_done);
index 9c12f80..d45be87 100644 (file)
@@ -166,22 +166,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
        return q->nr_congestion_off;
 }
 
-static inline int blk_cpu_to_group(int cpu)
-{
-       int group = NR_CPUS;
-#ifdef CONFIG_SCHED_MC
-       const struct cpumask *mask = cpu_coregroup_mask(cpu);
-       group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-       group = cpumask_first(topology_thread_cpumask(cpu));
-#else
-       return cpu;
-#endif
-       if (likely(group < NR_CPUS))
-               return group;
-       return cpu;
-}
-
 /*
  * Contribute to IO statistics IFF:
  *
index d0ba505..4572952 100644 (file)
@@ -3470,20 +3470,20 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
        const int rw = rq_data_dir(rq);
        const bool is_sync = rq_is_sync(rq);
        struct cfq_queue *cfqq;
+       unsigned int changed;
 
        might_sleep_if(gfp_mask & __GFP_WAIT);
 
        spin_lock_irq(q->queue_lock);
 
        /* handle changed notifications */
-       if (unlikely(cic->icq.changed)) {
-               if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
-                       changed_ioprio(cic);
+       changed = icq_get_changed(&cic->icq);
+       if (unlikely(changed & ICQ_IOPRIO_CHANGED))
+               changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-               if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
-                       changed_cgroup(cic);
+       if (unlikely(changed & ICQ_CGROUP_CHANGED))
+               changed_cgroup(cic);
 #endif
-       }
 
 new_queue:
        cfqq = cic_to_cfqq(cic, is_sync);
index 23b4f70..df9816e 100644 (file)
@@ -35,6 +35,7 @@ static DEFINE_IDR(ext_devt_idr);
 
 static struct device_type disk_type;
 
+static void disk_alloc_events(struct gendisk *disk);
 static void disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
@@ -601,6 +602,8 @@ void add_disk(struct gendisk *disk)
        disk->major = MAJOR(devt);
        disk->first_minor = MINOR(devt);
 
+       disk_alloc_events(disk);
+
        /* Register BDI before referencing it from bdev */
        bdi = &disk->queue->backing_dev_info;
        bdi_register_dev(bdi, disk_devt(disk));
@@ -1475,9 +1478,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
        intv = disk_events_poll_jiffies(disk);
        set_timer_slack(&ev->dwork.timer, intv / 4);
        if (check_now)
-               queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+               queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        else if (intv)
-               queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+               queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
 out_unlock:
        spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1521,7 +1524,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
        ev->clearing |= mask;
        if (!ev->block) {
                cancel_delayed_work(&ev->dwork);
-               queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+               queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        }
        spin_unlock_irq(&ev->lock);
 }
@@ -1558,7 +1561,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 
        /* uncondtionally schedule event check and wait for it to finish */
        disk_block_events(disk);
-       queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+       queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        flush_delayed_work(&ev->dwork);
        __disk_unblock_events(disk, false);
 
@@ -1595,7 +1598,7 @@ static void disk_events_workfn(struct work_struct *work)
 
        intv = disk_events_poll_jiffies(disk);
        if (!ev->block && intv)
-               queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+               queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
 
        spin_unlock_irq(&ev->lock);
 
@@ -1733,9 +1736,9 @@ module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
                &disk_events_dfl_poll_msecs, 0644);
 
 /*
- * disk_{add|del|release}_events - initialize and destroy disk_events.
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
  */
-static void disk_add_events(struct gendisk *disk)
+static void disk_alloc_events(struct gendisk *disk)
 {
        struct disk_events *ev;
 
@@ -1748,16 +1751,6 @@ static void disk_add_events(struct gendisk *disk)
                return;
        }
 
-       if (sysfs_create_files(&disk_to_dev(disk)->kobj,
-                              disk_events_attrs) < 0) {
-               pr_warn("%s: failed to create sysfs files for events\n",
-                       disk->disk_name);
-               kfree(ev);
-               return;
-       }
-
-       disk->ev = ev;
-
        INIT_LIST_HEAD(&ev->node);
        ev->disk = disk;
        spin_lock_init(&ev->lock);
@@ -1766,8 +1759,21 @@ static void disk_add_events(struct gendisk *disk)
        ev->poll_msecs = -1;
        INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
 
+       disk->ev = ev;
+}
+
+static void disk_add_events(struct gendisk *disk)
+{
+       if (!disk->ev)
+               return;
+
+       /* FIXME: error handling */
+       if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+               pr_warn("%s: failed to create sysfs files for events\n",
+                       disk->disk_name);
+
        mutex_lock(&disk_events_mutex);
-       list_add_tail(&ev->node, &disk_events);
+       list_add_tail(&disk->ev->node, &disk_events);
        mutex_unlock(&disk_events_mutex);
 
        /*
index d06ec1c..6df5d69 100644 (file)
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
        }
 }
 
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 {
-       struct parsed_partitions *state = NULL;
        struct disk_part_iter piter;
        struct hd_struct *part;
-       int p, highest, res;
-rescan:
-       if (state && !IS_ERR(state)) {
-               kfree(state);
-               state = NULL;
-       }
+       int res;
 
        if (bdev->bd_part_count)
                return -EBUSY;
@@ -412,6 +406,24 @@ rescan:
                delete_partition(disk, part->partno);
        disk_part_iter_exit(&piter);
 
+       return 0;
+}
+
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+       struct parsed_partitions *state = NULL;
+       struct hd_struct *part;
+       int p, highest, res;
+rescan:
+       if (state && !IS_ERR(state)) {
+               kfree(state);
+               state = NULL;
+       }
+
+       res = drop_partitions(disk, bdev);
+       if (res)
+               return res;
+
        if (disk->fops->revalidate_disk)
                disk->fops->revalidate_disk(disk);
        check_disk_size_change(disk, bdev);
@@ -515,6 +527,26 @@ rescan:
        return 0;
 }
 
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+       int res;
+
+       if (!bdev->bd_invalidated)
+               return 0;
+
+       res = drop_partitions(disk, bdev);
+       if (res)
+               return res;
+
+       set_capacity(disk, 0);
+       check_disk_size_change(disk, bdev);
+       bdev->bd_invalidated = 0;
+       /* tell userspace that the media / partition table may have changed */
+       kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+
+       return 0;
+}
+
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
index e086fbb..8db9089 100644 (file)
@@ -1177,7 +1177,8 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
   int TimeoutCounter;
   int i;
 
-  
+  memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T));
+
   if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
        return DAC960_Failure(Controller, "DMA mask out of range");
   Controller->BounceBufferLimit = DMA_BIT_MASK(32);
@@ -4627,7 +4628,8 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
   DAC960_Controller_T *Controller = Command->Controller;
   DAC960_CommandType_T CommandType = Command->CommandType;
   DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
-  DAC960_V2_IOCTL_Opcode_T CommandOpcode = CommandMailbox->Common.IOCTL_Opcode;
+  DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode;
+  DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode;
   DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus;
 
   if (CommandType == DAC960_ReadCommand ||
@@ -4699,7 +4701,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
     {
       if (Controller->ShutdownMonitoringTimer)
              return;
-      if (CommandOpcode == DAC960_V2_GetControllerInfo)
+      if (IOCTLOpcode == DAC960_V2_GetControllerInfo)
        {
          DAC960_V2_ControllerInfo_T *NewControllerInfo =
            Controller->V2.NewControllerInformation;
@@ -4719,14 +4721,14 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
          memcpy(ControllerInfo, NewControllerInfo,
                 sizeof(DAC960_V2_ControllerInfo_T));
        }
-      else if (CommandOpcode == DAC960_V2_GetEvent)
+      else if (IOCTLOpcode == DAC960_V2_GetEvent)
        {
          if (CommandStatus == DAC960_V2_NormalCompletion) {
            DAC960_V2_ReportEvent(Controller, Controller->V2.Event);
          }
          Controller->V2.NextEventSequenceNumber++;
        }
-      else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid &&
+      else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid &&
               CommandStatus == DAC960_V2_NormalCompletion)
        {
          DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo =
@@ -4915,7 +4917,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
          NewPhysicalDeviceInfo->LogicalUnit++;
          Controller->V2.PhysicalDeviceIndex++;
        }
-      else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid)
+      else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid)
        {
          unsigned int DeviceIndex;
          for (DeviceIndex = Controller->V2.PhysicalDeviceIndex;
@@ -4938,7 +4940,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
            }
          Controller->V2.NeedPhysicalDeviceInformation = false;
        }
-      else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid &&
+      else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid &&
               CommandStatus == DAC960_V2_NormalCompletion)
        {
          DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo =
@@ -5065,7 +5067,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
                         [LogicalDeviceNumber] = true;
          NewLogicalDeviceInfo->LogicalDeviceNumber++;
        }
-      else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid)
+      else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid)
        {
          int LogicalDriveNumber;
          for (LogicalDriveNumber = 0;
index 9baf11e..744f078 100644 (file)
@@ -3832,7 +3832,7 @@ static int __floppy_read_block_0(struct block_device *bdev)
        bio.bi_size = size;
        bio.bi_bdev = bdev;
        bio.bi_sector = 0;
-       bio.bi_flags = BIO_QUIET;
+       bio.bi_flags = (1 << BIO_QUIET);
        init_completion(&complete);
        bio.bi_private = &complete;
        bio.bi_end_io = floppy_rb0_complete;
index e7472f5..3fb6ab4 100644 (file)
@@ -1120,7 +1120,7 @@ static inline void carm_handle_resp(struct carm_host *host,
                        break;
                case MISC_GET_FW_VER: {
                        struct carm_fw_ver *ver = (struct carm_fw_ver *)
-                               mem + sizeof(struct carm_msg_get_fw_ver);
+                               (mem + sizeof(struct carm_msg_get_fw_ver));
                        if (!error) {
                                host->fw_ver = le32_to_cpu(ver->version);
                                host->flags |= (ver->features & FL_FW_VER_MASK);
index 9fec323..2a5e45d 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/module.h>
 #include <linux/kmsg_dump.h>
 #include <linux/time.h>
-#include <linux/err.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
index 0c964cd..ce29e7c 100644 (file)
@@ -797,7 +797,7 @@ static int __init tlclk_init(void)
        telclk_interrupt = (inb(TLCLK_REG7) & 0x0f);
 
        if (0x0F == telclk_interrupt ) { /* not MCPBL0010 ? */
-               printk(KERN_ERR "telclk_interrup = 0x%x non-mcpbl0010 hw.\n",
+               printk(KERN_ERR "telclk_interrupt = 0x%x non-mcpbl0010 hw.\n",
                        telclk_interrupt);
                ret = -ENXIO;
                goto out3;
index ad6e64a..8b34c65 100644 (file)
@@ -976,7 +976,7 @@ int __init viotap_init(void)
 
        tape_class = class_create(THIS_MODULE, "tape");
        if (IS_ERR(tape_class)) {
-               printk(VIOTAPE_KERN_WARN "Unable to allocat class\n");
+               printk(VIOTAPE_KERN_WARN "Unable to allocate class\n");
                ret = PTR_ERR(tape_class);
                goto unreg_chrdev;
        }
index 6b5cf02..82e8820 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/async.h>
 #include <asm/io.h>
 
 /*
@@ -179,17 +180,15 @@ static int verify_pmtmr_rate(void)
 /* Number of reads we try to get two different values */
 #define ACPI_PM_READ_CHECKS 10000
 
-static int __init init_acpi_pm_clocksource(void)
+static void __init acpi_pm_clocksource_async(void *unused, async_cookie_t cookie)
 {
        cycle_t value1, value2;
        unsigned int i, j = 0;
 
-       if (!pmtmr_ioport)
-               return -ENODEV;
 
        /* "verify" this timing source: */
        for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) {
-               udelay(100 * j);
+               usleep_range(100 * j, 100 * j + 100);
                value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
                for (i = 0; i < ACPI_PM_READ_CHECKS; i++) {
                        value2 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
@@ -203,25 +202,34 @@ static int __init init_acpi_pm_clocksource(void)
                               " 0x%#llx, 0x%#llx - aborting.\n",
                               value1, value2);
                        pmtmr_ioport = 0;
-                       return -EINVAL;
+                       return;
                }
                if (i == ACPI_PM_READ_CHECKS) {
                        printk(KERN_INFO "PM-Timer failed consistency check "
                               " (0x%#llx) - aborting.\n", value1);
                        pmtmr_ioport = 0;
-                       return -ENODEV;
+                       return;
                }
        }
 
        if (verify_pmtmr_rate() != 0){
                pmtmr_ioport = 0;
-               return -ENODEV;
+               return;
        }
 
-       return clocksource_register_hz(&clocksource_acpi_pm,
+       clocksource_register_hz(&clocksource_acpi_pm,
                                                PMTMR_TICKS_PER_SEC);
 }
 
+static int __init init_acpi_pm_clocksource(void)
+{
+       if (!pmtmr_ioport)
+               return -ENODEV;
+
+       async_schedule(acpi_pm_clocksource_async, NULL);
+       return 0;
+}
+
 /* We use fs_initcall because we want the PCI fixups to have run
  * but we still need to load before device_initcall
  */
index fb6b6d2..c26c369 100644 (file)
@@ -52,7 +52,6 @@ static struct clocksource clocksource_dbx500_prcmu = {
        .name           = "dbx500-prcmu-timer",
        .rating         = 300,
        .read           = clksrc_dbx500_prcmu_read,
-       .shift          = 10,
        .mask           = CLOCKSOURCE_MASK(32),
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -90,7 +89,5 @@ void __init clksrc_dbx500_prcmu_init(void __iomem *base)
        setup_sched_clock(dbx500_prcmu_sched_clock_read,
                         32, RATE_32K);
 #endif
-       clocksource_calc_mult_shift(&clocksource_dbx500_prcmu,
-                                   RATE_32K, SCHED_CLOCK_MIN_WRAP);
-       clocksource_register(&clocksource_dbx500_prcmu);
+       clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K);
 }
index b7dab32..540795c 100644 (file)
@@ -100,7 +100,6 @@ static struct clock_event_device cs5535_clockevent = {
        .set_mode = mfgpt_set_mode,
        .set_next_event = mfgpt_next_event,
        .rating = 250,
-       .cpumask = cpu_all_mask,
        .shift = 32
 };
 
@@ -133,7 +132,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id)
 
 static struct irqaction mfgptirq  = {
        .handler = mfgpt_tick,
-       .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
+       .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER | IRQF_SHARED,
        .name = DRV_NAME,
 };
 
index 72f811f..9e0998f 100644 (file)
@@ -55,11 +55,11 @@ static int __init init_cyclone_clocksource(void)
        }
        /* even on 64bit systems, this is only 32bits: */
        base = readl(reg);
+       iounmap(reg);
        if (!base) {
                printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
                return -ENODEV;
        }
-       iounmap(reg);
 
        /* setup PMCC: */
        offset = base + CYCLONE_PMCC_OFFSET;
index 27f4d96..64f9e82 100644 (file)
@@ -49,9 +49,6 @@ static cycle_t read_hrt(struct clocksource *cs)
        return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET);
 }
 
-#define HRT_SHIFT_1    22
-#define HRT_SHIFT_27   26
-
 static struct clocksource cs_hrt = {
        .name           = "scx200_hrt",
        .rating         = 250,
@@ -63,6 +60,7 @@ static struct clocksource cs_hrt = {
 
 static int __init init_hrt_clocksource(void)
 {
+       u32 freq;
        /* Make sure scx200 has initialized the configuration block */
        if (!scx200_cb_present())
                return -ENODEV;
@@ -71,7 +69,7 @@ static int __init init_hrt_clocksource(void)
        if (!request_region(scx200_cb_base + SCx200_TIMER_OFFSET,
                            SCx200_TIMER_SIZE,
                            "NatSemi SCx200 High-Resolution Timer")) {
-               printk(KERN_WARNING NAME ": unable to lock timer region\n");
+               pr_warn("unable to lock timer region\n");
                return -ENODEV;
        }
 
@@ -79,19 +77,13 @@ static int __init init_hrt_clocksource(void)
        outb(HR_TMEN | (mhz27 ? HR_TMCLKSEL : 0),
             scx200_cb_base + SCx200_TMCNFG_OFFSET);
 
-       if (mhz27) {
-               cs_hrt.shift = HRT_SHIFT_27;
-               cs_hrt.mult = clocksource_hz2mult((HRT_FREQ + ppm) * 27,
-                                                 cs_hrt.shift);
-       } else {
-               cs_hrt.shift = HRT_SHIFT_1;
-               cs_hrt.mult = clocksource_hz2mult(HRT_FREQ + ppm,
-                                                 cs_hrt.shift);
-       }
-       printk(KERN_INFO "enabling scx200 high-res timer (%s MHz +%d ppm)\n",
-               mhz27 ? "27":"1", ppm);
+       freq = (HRT_FREQ + ppm);
+       if (mhz27)
+               freq *= 27;
+
+       pr_info("enabling scx200 high-res timer (%s MHz +%d ppm)\n", mhz27 ? "27":"1", ppm);
 
-       return clocksource_register(&cs_hrt);
+       return clocksource_register_hz(&cs_hrt, freq);
 }
 
 module_init(init_hrt_clocksource);
index 59f4261..6588f43 100644 (file)
@@ -94,13 +94,13 @@ int cpuidle_idle_call(void)
 
        target_state = &drv->states[next_state];
 
-       trace_power_start(POWER_CSTATE, next_state, dev->cpu);
-       trace_cpu_idle(next_state, dev->cpu);
+       trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
+       trace_cpu_idle_rcuidle(next_state, dev->cpu);
 
        entered_state = target_state->enter(dev, drv, next_state);
 
-       trace_power_end(dev->cpu);
-       trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
+       trace_power_end_rcuidle(dev->cpu);
+       trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
        if (entered_state >= 0) {
                /* Update cpuidle counters */
index 618bd4d..99d5527 100644 (file)
@@ -54,14 +54,14 @@ convert_to_display_mode(struct drm_display_mode *mode,
        mode->vrefresh = timing->refresh;
 
        mode->hdisplay = timing->xres;
-       mode->hsync_start = mode->hdisplay + timing->left_margin;
+       mode->hsync_start = mode->hdisplay + timing->right_margin;
        mode->hsync_end = mode->hsync_start + timing->hsync_len;
-       mode->htotal = mode->hsync_end + timing->right_margin;
+       mode->htotal = mode->hsync_end + timing->left_margin;
 
        mode->vdisplay = timing->yres;
-       mode->vsync_start = mode->vdisplay + timing->upper_margin;
+       mode->vsync_start = mode->vdisplay + timing->lower_margin;
        mode->vsync_end = mode->vsync_start + timing->vsync_len;
-       mode->vtotal = mode->vsync_end + timing->lower_margin;
+       mode->vtotal = mode->vsync_end + timing->upper_margin;
        mode->width_mm = panel->width_mm;
        mode->height_mm = panel->height_mm;
 
@@ -85,14 +85,14 @@ convert_to_video_timing(struct fb_videomode *timing,
        timing->refresh = drm_mode_vrefresh(mode);
 
        timing->xres = mode->hdisplay;
-       timing->left_margin = mode->hsync_start - mode->hdisplay;
+       timing->right_margin = mode->hsync_start - mode->hdisplay;
        timing->hsync_len = mode->hsync_end - mode->hsync_start;
-       timing->right_margin = mode->htotal - mode->hsync_end;
+       timing->left_margin = mode->htotal - mode->hsync_end;
 
        timing->yres = mode->vdisplay;
-       timing->upper_margin = mode->vsync_start - mode->vdisplay;
+       timing->lower_margin = mode->vsync_start - mode->vdisplay;
        timing->vsync_len = mode->vsync_end - mode->vsync_start;
-       timing->lower_margin = mode->vtotal - mode->vsync_end;
+       timing->upper_margin = mode->vtotal - mode->vsync_end;
 
        if (mode->flags & DRM_MODE_FLAG_INTERLACE)
                timing->vmode = FB_VMODE_INTERLACED;
index 58820eb..09cc13f 100644 (file)
@@ -246,7 +246,7 @@ static struct platform_driver exynos_drm_platform_driver = {
        .remove         = __devexit_p(exynos_drm_platform_remove),
        .driver         = {
                .owner  = THIS_MODULE,
-               .name   = DRIVER_NAME,
+               .name   = "exynos-drm",
        },
 };
 
index 3508700..54f8f07 100644 (file)
@@ -46,39 +46,13 @@ struct exynos_drm_fbdev {
        struct exynos_drm_gem_obj       *exynos_gem_obj;
 };
 
-static int exynos_drm_fbdev_set_par(struct fb_info *info)
-{
-       struct fb_var_screeninfo *var = &info->var;
-
-       switch (var->bits_per_pixel) {
-       case 32:
-       case 24:
-       case 18:
-       case 16:
-       case 12:
-               info->fix.visual = FB_VISUAL_TRUECOLOR;
-               break;
-       case 1:
-               info->fix.visual = FB_VISUAL_MONO01;
-               break;
-       default:
-               info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
-               break;
-       }
-
-       info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8;
-
-       return drm_fb_helper_set_par(info);
-}
-
-
 static struct fb_ops exynos_drm_fb_ops = {
        .owner          = THIS_MODULE,
        .fb_fillrect    = cfb_fillrect,
        .fb_copyarea    = cfb_copyarea,
        .fb_imageblit   = cfb_imageblit,
        .fb_check_var   = drm_fb_helper_check_var,
-       .fb_set_par     = exynos_drm_fbdev_set_par,
+       .fb_set_par     = drm_fb_helper_set_par,
        .fb_blank       = drm_fb_helper_blank,
        .fb_pan_display = drm_fb_helper_pan_display,
        .fb_setcmap     = drm_fb_helper_setcmap,
index 360adf2..56458ee 100644 (file)
@@ -817,8 +817,6 @@ static int __devinit fimd_probe(struct platform_device *pdev)
                goto err_clk_get;
        }
 
-       clk_enable(ctx->bus_clk);
-
        ctx->lcd_clk = clk_get(dev, "sclk_fimd");
        if (IS_ERR(ctx->lcd_clk)) {
                dev_err(dev, "failed to get lcd clock\n");
@@ -826,8 +824,6 @@ static int __devinit fimd_probe(struct platform_device *pdev)
                goto err_bus_clk;
        }
 
-       clk_enable(ctx->lcd_clk);
-
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res) {
                dev_err(dev, "failed to find registers\n");
@@ -864,17 +860,11 @@ static int __devinit fimd_probe(struct platform_device *pdev)
                goto err_req_irq;
        }
 
-       ctx->clkdiv = fimd_calc_clkdiv(ctx, &panel->timing);
        ctx->vidcon0 = pdata->vidcon0;
        ctx->vidcon1 = pdata->vidcon1;
        ctx->default_win = pdata->default_win;
        ctx->panel = panel;
 
-       panel->timing.pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
-
-       DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
-                       panel->timing.pixclock, ctx->clkdiv);
-
        subdrv = &ctx->subdrv;
 
        subdrv->probe = fimd_subdrv_probe;
@@ -889,10 +879,15 @@ static int __devinit fimd_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, ctx);
 
-       pm_runtime_set_active(dev);
        pm_runtime_enable(dev);
        pm_runtime_get_sync(dev);
 
+       ctx->clkdiv = fimd_calc_clkdiv(ctx, &panel->timing);
+       panel->timing.pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
+
+       DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
+                       panel->timing.pixclock, ctx->clkdiv);
+
        for (win = 0; win < WINDOWS_NR; win++)
                fimd_clear_win(ctx, win);
 
index 4a5b099..53404af 100644 (file)
@@ -321,6 +321,8 @@ static int cdv_chip_setup(struct drm_device *dev)
        cdv_get_core_freq(dev);
        gma_intel_opregion_init(dev);
        psb_intel_init_bios(dev);
+       REG_WRITE(PORT_HOTPLUG_EN, 0);
+       REG_WRITE(PORT_HOTPLUG_STAT, REG_READ(PORT_HOTPLUG_STAT));
        return 0;
 }
 
index 830dfdd..be61673 100644 (file)
@@ -247,7 +247,6 @@ static struct fb_ops psbfb_roll_ops = {
        .fb_imageblit = cfb_imageblit,
        .fb_pan_display = psbfb_pan,
        .fb_mmap = psbfb_mmap,
-       .fb_sync = psbfb_sync,
        .fb_ioctl = psbfb_ioctl,
 };
 
index 5d5330f..aff194f 100644 (file)
@@ -446,10 +446,9 @@ int psb_gtt_init(struct drm_device *dev, int resume)
        pg->gtt_start = pci_resource_start(dev->pdev, PSB_GTT_RESOURCE);
        gtt_pages = pci_resource_len(dev->pdev, PSB_GTT_RESOURCE)
                                                                >> PAGE_SHIFT;
-       /* Some CDV firmware doesn't report this currently. In which case the
-          system has 64 gtt pages */
+       /* CDV doesn't report this. In which case the system has 64 gtt pages */
        if (pg->gtt_start == 0 || gtt_pages == 0) {
-               dev_err(dev->dev, "GTT PCI BAR not initialized.\n");
+               dev_dbg(dev->dev, "GTT PCI BAR not initialized.\n");
                gtt_pages = 64;
                pg->gtt_start = dev_priv->pge_ctl;
        }
@@ -461,10 +460,10 @@ int psb_gtt_init(struct drm_device *dev, int resume)
 
        if (pg->gatt_pages == 0 || pg->gatt_start == 0) {
                static struct resource fudge;   /* Preferably peppermint */
-               /* This can occur on CDV SDV systems. Fudge it in this case.
+               /* This can occur on CDV systems. Fudge it in this case.
                   We really don't care what imaginary space is being allocated
                   at this point */
-               dev_err(dev->dev, "GATT PCI BAR not initialized.\n");
+               dev_dbg(dev->dev, "GATT PCI BAR not initialized.\n");
                pg->gatt_start = 0x40000000;
                pg->gatt_pages = (128 * 1024 * 1024) >> PAGE_SHIFT;
                /* This is a little confusing but in fact the GTT is providing
index 03c53fc..558ac71 100644 (file)
 #define   DVS_FORMAT_RGBX888   (2<<25)
 #define   DVS_FORMAT_RGBX161616        (3<<25)
 #define   DVS_SOURCE_KEY       (1<<22)
-#define   DVS_RGB_ORDER_RGBX   (1<<20)
+#define   DVS_RGB_ORDER_XBGR   (1<<20)
 #define   DVS_YUV_BYTE_ORDER_MASK (3<<16)
 #define   DVS_YUV_ORDER_YUYV   (0<<16)
 #define   DVS_YUV_ORDER_UYVY   (1<<16)
index f851db7..397087c 100644 (file)
@@ -7828,6 +7828,7 @@ int intel_framebuffer_init(struct drm_device *dev,
        case DRM_FORMAT_RGB332:
        case DRM_FORMAT_RGB565:
        case DRM_FORMAT_XRGB8888:
+       case DRM_FORMAT_XBGR8888:
        case DRM_FORMAT_ARGB8888:
        case DRM_FORMAT_XRGB2101010:
        case DRM_FORMAT_ARGB2101010:
index 2288abf..a083504 100644 (file)
@@ -225,16 +225,16 @@ snb_update_plane(struct drm_plane *plane, struct drm_framebuffer *fb,
 
        /* Mask out pixel format bits in case we change it */
        dvscntr &= ~DVS_PIXFORMAT_MASK;
-       dvscntr &= ~DVS_RGB_ORDER_RGBX;
+       dvscntr &= ~DVS_RGB_ORDER_XBGR;
        dvscntr &= ~DVS_YUV_BYTE_ORDER_MASK;
 
        switch (fb->pixel_format) {
        case DRM_FORMAT_XBGR8888:
-               dvscntr |= DVS_FORMAT_RGBX888;
+               dvscntr |= DVS_FORMAT_RGBX888 | DVS_RGB_ORDER_XBGR;
                pixel_size = 4;
                break;
        case DRM_FORMAT_XRGB8888:
-               dvscntr |= DVS_FORMAT_RGBX888 | DVS_RGB_ORDER_RGBX;
+               dvscntr |= DVS_FORMAT_RGBX888;
                pixel_size = 4;
                break;
        case DRM_FORMAT_YUYV:
index fbcd848..17ca72c 100644 (file)
@@ -2362,6 +2362,9 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
        uint64_t addr = semaphore->gpu_addr;
        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
 
+       if (rdev->family < CHIP_CAYMAN)
+               sel |= PACKET3_SEM_WAIT_ON_SIGNAL;
+
        radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
        radeon_ring_write(ring, addr & 0xffffffff);
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
index 2d1f6c5..73e2c7c 100644 (file)
@@ -314,6 +314,10 @@ const u32 r6xx_default_state[] =
        0x00000000, /* VGT_VTX_CNT_EN */
 
        0xc0016900,
+       0x000000d4,
+       0x00000000, /* SX_MISC */
+
+       0xc0016900,
        0x000002c8,
        0x00000000, /* VGT_STRMOUT_BUFFER_EN */
 
@@ -626,6 +630,10 @@ const u32 r7xx_default_state[] =
        0x00000000, /* VGT_VTX_CNT_EN */
 
        0xc0016900,
+       0x000000d4,
+       0x00000000, /* SX_MISC */
+
+       0xc0016900,
        0x000002c8,
        0x00000000, /* VGT_STRMOUT_BUFFER_EN */
 
index 3ee1fd7..9b23670 100644 (file)
 #define        PACKET3_STRMOUT_BUFFER_UPDATE                   0x34
 #define        PACKET3_INDIRECT_BUFFER_MP                      0x38
 #define        PACKET3_MEM_SEMAPHORE                           0x39
+#              define PACKET3_SEM_WAIT_ON_SIGNAL    (0x1 << 12)
 #              define PACKET3_SEM_SEL_SIGNAL       (0x6 << 29)
 #              define PACKET3_SEM_SEL_WAIT         (0x7 << 29)
 #define        PACKET3_MPEG_INDEX                              0x3A
index 8b3d8ed..8c9a811 100644 (file)
@@ -1057,7 +1057,7 @@ static int radeon_dvi_mode_valid(struct drm_connector *connector,
                    (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B))
                        return MODE_OK;
                else if (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_A) {
-                       if (ASIC_IS_DCE3(rdev)) {
+                       if (0) {
                                /* HDMI 1.3+ supports max clock of 340 Mhz */
                                if (mode->clock > 340000)
                                        return MODE_CLOCK_HIGH;
index 8c49fef..3d31433 100644 (file)
@@ -1078,15 +1078,21 @@ static const struct drm_framebuffer_funcs radeon_fb_funcs = {
        .create_handle = radeon_user_framebuffer_create_handle,
 };
 
-void
+int
 radeon_framebuffer_init(struct drm_device *dev,
                        struct radeon_framebuffer *rfb,
                        struct drm_mode_fb_cmd2 *mode_cmd,
                        struct drm_gem_object *obj)
 {
+       int ret;
        rfb->obj = obj;
-       drm_framebuffer_init(dev, &rfb->base, &radeon_fb_funcs);
+       ret = drm_framebuffer_init(dev, &rfb->base, &radeon_fb_funcs);
+       if (ret) {
+               rfb->obj = NULL;
+               return ret;
+       }
        drm_helper_mode_fill_fb_struct(&rfb->base, mode_cmd);
+       return 0;
 }
 
 static struct drm_framebuffer *
@@ -1096,6 +1102,7 @@ radeon_user_framebuffer_create(struct drm_device *dev,
 {
        struct drm_gem_object *obj;
        struct radeon_framebuffer *radeon_fb;
+       int ret;
 
        obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
        if (obj ==  NULL) {
@@ -1108,7 +1115,12 @@ radeon_user_framebuffer_create(struct drm_device *dev,
        if (radeon_fb == NULL)
                return ERR_PTR(-ENOMEM);
 
-       radeon_framebuffer_init(dev, radeon_fb, mode_cmd, obj);
+       ret = radeon_framebuffer_init(dev, radeon_fb, mode_cmd, obj);
+       if (ret) {
+               kfree(radeon_fb);
+               drm_gem_object_unreference_unlocked(obj);
+               return NULL;
+       }
 
        return &radeon_fb->base;
 }
index 9419c51..26e9270 100644 (file)
@@ -307,8 +307,6 @@ void radeon_panel_mode_fixup(struct drm_encoder *encoder,
 bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder,
                                    u32 pixel_clock)
 {
-       struct drm_device *dev = encoder->dev;
-       struct radeon_device *rdev = dev->dev_private;
        struct drm_connector *connector;
        struct radeon_connector *radeon_connector;
        struct radeon_connector_atom_dig *dig_connector;
@@ -326,7 +324,7 @@ bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder,
        case DRM_MODE_CONNECTOR_HDMIB:
                if (radeon_connector->use_digital) {
                        /* HDMI 1.3 supports up to 340 Mhz over single link */
-                       if (ASIC_IS_DCE3(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) {
+                       if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) {
                                if (pixel_clock > 340000)
                                        return true;
                                else
@@ -348,7 +346,7 @@ bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder,
                        return false;
                else {
                        /* HDMI 1.3 supports up to 340 Mhz over single link */
-                       if (ASIC_IS_DCE3(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) {
+                       if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) {
                                if (pixel_clock > 340000)
                                        return true;
                                else
index cf2bf35..195471c 100644 (file)
@@ -209,6 +209,11 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
                                                          sizes->surface_depth);
 
        ret = radeonfb_create_pinned_object(rfbdev, &mode_cmd, &gobj);
+       if (ret) {
+               DRM_ERROR("failed to create fbcon object %d\n", ret);
+               return ret;
+       }
+
        rbo = gem_to_radeon_bo(gobj);
 
        /* okay we have an object now allocate the framebuffer */
@@ -220,7 +225,11 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
 
        info->par = rfbdev;
 
-       radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+       ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+       if (ret) {
+               DRM_ERROR("failed to initalise framebuffer %d\n", ret);
+               goto out_unref;
+       }
 
        fb = &rfbdev->rfb.base;
 
index 4330e32..8a85598 100644 (file)
@@ -649,7 +649,7 @@ extern void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
                                     u16 blue, int regno);
 extern void radeon_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
                                     u16 *blue, int regno);
-void radeon_framebuffer_init(struct drm_device *dev,
+int radeon_framebuffer_init(struct drm_device *dev,
                             struct radeon_framebuffer *rfb,
                             struct drm_mode_fb_cmd2 *mode_cmd,
                             struct drm_gem_object *obj);
index b8574cd..63552e3 100644 (file)
@@ -59,6 +59,9 @@
 #define USB_VENDOR_ID_AIRCABLE         0x16CA
 #define USB_DEVICE_ID_AIRCABLE1                0x1502
 
+#define USB_VENDOR_ID_AIREN            0x1a2c
+#define USB_DEVICE_ID_AIREN_SLIMPLUS   0x0002
+
 #define USB_VENDOR_ID_ALCOR            0x058f
 #define USB_DEVICE_ID_ALCOR_USBRS232   0x9720
 
index 9333d69..627850a 100644 (file)
@@ -986,8 +986,13 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
                return;
        }
 
-       /* Ignore out-of-range values as per HID specification, section 5.10 */
-       if (value < field->logical_minimum || value > field->logical_maximum) {
+       /*
+        * Ignore out-of-range values as per HID specification,
+        * section 5.10 and 6.2.25
+        */
+       if ((field->flags & HID_MAIN_ITEM_VARIABLE) &&
+           (value < field->logical_minimum ||
+            value > field->logical_maximum)) {
                dbg_hid("Ignoring out-of-range value %x\n", value);
                return;
        }
index c831af9..57d4e1e 100644 (file)
@@ -54,6 +54,7 @@ static const struct hid_blacklist {
        { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT },
        { USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT },
 
+       { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET },
index 0226040..dad895f 100644 (file)
@@ -497,8 +497,9 @@ config SENSORS_JC42
          If you say yes here, you get support for JEDEC JC42.4 compliant
          temperature sensors, which are used on many DDR3 memory modules for
          mobile devices and servers.  Support will include, but not be limited
-         to, ADT7408, CAT34TS02, CAT6095, MAX6604, MCP9805, MCP98242, MCP98243,
-         MCP9843, SE97, SE98, STTS424(E), TSE2002B3, and TS3000B3.
+         to, ADT7408, AT30TS00, CAT34TS02, CAT6095, MAX6604, MCP9804, MCP9805,
+         MCP98242, MCP98243, MCP9843, SE97, SE98, STTS424(E), STTS2002,
+         STTS3000, TSE2002B3, TSE2002GB2, TS3000B3, and TS3000GB2.
 
          This driver can also be built as a module.  If so, the module
          will be called jc42.
index 28c09ee..b927ee5 100644 (file)
@@ -64,6 +64,7 @@ static const unsigned short normal_i2c[] = {
 
 /* Manufacturer IDs */
 #define ADT_MANID              0x11d4  /* Analog Devices */
+#define ATMEL_MANID            0x001f  /* Atmel */
 #define MAX_MANID              0x004d  /* Maxim */
 #define IDT_MANID              0x00b3  /* IDT */
 #define MCP_MANID              0x0054  /* Microchip */
@@ -77,15 +78,25 @@ static const unsigned short normal_i2c[] = {
 #define ADT7408_DEVID          0x0801
 #define ADT7408_DEVID_MASK     0xffff
 
+/* Atmel */
+#define AT30TS00_DEVID         0x8201
+#define AT30TS00_DEVID_MASK    0xffff
+
 /* IDT */
 #define TS3000B3_DEVID         0x2903  /* Also matches TSE2002B3 */
 #define TS3000B3_DEVID_MASK    0xffff
 
+#define TS3000GB2_DEVID                0x2912  /* Also matches TSE2002GB2 */
+#define TS3000GB2_DEVID_MASK   0xffff
+
 /* Maxim */
 #define MAX6604_DEVID          0x3e00
 #define MAX6604_DEVID_MASK     0xffff
 
 /* Microchip */
+#define MCP9804_DEVID          0x0200
+#define MCP9804_DEVID_MASK     0xfffc
+
 #define MCP98242_DEVID         0x2000
 #define MCP98242_DEVID_MASK    0xfffc
 
@@ -113,6 +124,12 @@ static const unsigned short normal_i2c[] = {
 #define STTS424E_DEVID         0x0000
 #define STTS424E_DEVID_MASK    0xfffe
 
+#define STTS2002_DEVID         0x0300
+#define STTS2002_DEVID_MASK    0xffff
+
+#define STTS3000_DEVID         0x0200
+#define STTS3000_DEVID_MASK    0xffff
+
 static u16 jc42_hysteresis[] = { 0, 1500, 3000, 6000 };
 
 struct jc42_chips {
@@ -123,8 +140,11 @@ struct jc42_chips {
 
 static struct jc42_chips jc42_chips[] = {
        { ADT_MANID, ADT7408_DEVID, ADT7408_DEVID_MASK },
+       { ATMEL_MANID, AT30TS00_DEVID, AT30TS00_DEVID_MASK },
        { IDT_MANID, TS3000B3_DEVID, TS3000B3_DEVID_MASK },
+       { IDT_MANID, TS3000GB2_DEVID, TS3000GB2_DEVID_MASK },
        { MAX_MANID, MAX6604_DEVID, MAX6604_DEVID_MASK },
+       { MCP_MANID, MCP9804_DEVID, MCP9804_DEVID_MASK },
        { MCP_MANID, MCP98242_DEVID, MCP98242_DEVID_MASK },
        { MCP_MANID, MCP98243_DEVID, MCP98243_DEVID_MASK },
        { MCP_MANID, MCP9843_DEVID, MCP9843_DEVID_MASK },
@@ -133,6 +153,8 @@ static struct jc42_chips jc42_chips[] = {
        { NXP_MANID, SE98_DEVID, SE98_DEVID_MASK },
        { STM_MANID, STTS424_DEVID, STTS424_DEVID_MASK },
        { STM_MANID, STTS424E_DEVID, STTS424E_DEVID_MASK },
+       { STM_MANID, STTS2002_DEVID, STTS2002_DEVID_MASK },
+       { STM_MANID, STTS3000_DEVID, STTS3000_DEVID_MASK },
 };
 
 /* Each client has this additional data */
@@ -159,10 +181,12 @@ static struct jc42_data *jc42_update_device(struct device *dev);
 
 static const struct i2c_device_id jc42_id[] = {
        { "adt7408", 0 },
+       { "at30ts00", 0 },
        { "cat94ts02", 0 },
        { "cat6095", 0 },
        { "jc42", 0 },
        { "max6604", 0 },
+       { "mcp9804", 0 },
        { "mcp9805", 0 },
        { "mcp98242", 0 },
        { "mcp98243", 0 },
@@ -171,8 +195,10 @@ static const struct i2c_device_id jc42_id[] = {
        { "se97b", 0 },
        { "se98", 0 },
        { "stts424", 0 },
-       { "tse2002b3", 0 },
-       { "ts3000b3", 0 },
+       { "stts2002", 0 },
+       { "stts3000", 0 },
+       { "tse2002", 0 },
+       { "ts3000", 0 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, jc42_id);
index 00460d8..d89b339 100644 (file)
@@ -54,7 +54,8 @@
                                                   lcrit_alarm, crit_alarm */
 #define PMBUS_IOUT_BOOLEANS_PER_PAGE   3       /* alarm, lcrit_alarm,
                                                   crit_alarm */
-#define PMBUS_POUT_BOOLEANS_PER_PAGE   2       /* alarm, crit_alarm */
+#define PMBUS_POUT_BOOLEANS_PER_PAGE   3       /* cap_alarm, alarm, crit_alarm
+                                                */
 #define PMBUS_MAX_BOOLEANS_PER_FAN     2       /* alarm, fault */
 #define PMBUS_MAX_BOOLEANS_PER_TEMP    4       /* min_alarm, max_alarm,
                                                   lcrit_alarm, crit_alarm */
index 48c7b4a..e3e8420 100644 (file)
@@ -33,6 +33,7 @@ enum chips { zl2004, zl2005, zl2006, zl2008, zl2105, zl2106, zl6100, zl6105 };
 struct zl6100_data {
        int id;
        ktime_t access;         /* chip access time */
+       int delay;              /* Delay between chip accesses in uS */
        struct pmbus_driver_info info;
 };
 
@@ -52,10 +53,10 @@ MODULE_PARM_DESC(delay, "Delay between chip accesses in uS");
 /* Some chips need a delay between accesses */
 static inline void zl6100_wait(const struct zl6100_data *data)
 {
-       if (delay) {
+       if (data->delay) {
                s64 delta = ktime_us_delta(ktime_get(), data->access);
-               if (delta < delay)
-                       udelay(delay - delta);
+               if (delta < data->delay)
+                       udelay(data->delay - delta);
        }
 }
 
@@ -199,16 +200,11 @@ static int zl6100_probe(struct i2c_client *client,
        data->id = mid->driver_data;
 
        /*
-        * ZL2005, ZL2008, ZL2105, and ZL6100 are known to require a wait time
-        * between I2C accesses. ZL2004 and ZL6105 are known to be safe.
-        * Other chips have not yet been tested.
-        *
-        * Only clear the wait time for chips known to be safe. The wait time
-        * can be cleared later for additional chips if tests show that it
-        * is not needed (in other words, better be safe than sorry).
+        * According to information from the chip vendor, all currently
+        * supported chips are known to require a wait time between I2C
+        * accesses.
         */
-       if (data->id == zl2004 || data->id == zl6105)
-               delay = 0;
+       data->delay = delay;
 
        /*
         * Since there was a direct I2C device access above, wait before
index 5276d19..a658d62 100644 (file)
@@ -39,7 +39,7 @@
                                               0x8860 0xa1
     w83627dhg    9      5       4       3      0xa020 0xc1    0x5ca3
     w83627dhg-p  9      5       4       3      0xb070 0xc1    0x5ca3
-    w83627uhg    8      2       2       2      0xa230 0xc1    0x5ca3
+    w83627uhg    8      2       2       3      0xa230 0xc1    0x5ca3
     w83667hg     9      5       3       3      0xa510 0xc1    0x5ca3
     w83667hg-b   9      5       3       4      0xb350 0xc1    0x5ca3
     nct6775f     9      4       3       9      0xb470 0xc1    0x5ca3
@@ -1607,7 +1607,7 @@ store_##reg(struct device *dev, struct device_attribute *attr, \
        val = step_time_to_reg(val, data->pwm_mode[nr]); \
        mutex_lock(&data->update_lock); \
        data->reg[nr] = val; \
-       w83627ehf_write_value(data, W83627EHF_REG_##REG[nr], val); \
+       w83627ehf_write_value(data, data->REG_##REG[nr], val); \
        mutex_unlock(&data->update_lock); \
        return count; \
 } \
@@ -2004,7 +2004,8 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev)
                goto exit;
        }
 
-       data = kzalloc(sizeof(struct w83627ehf_data), GFP_KERNEL);
+       data = devm_kzalloc(&pdev->dev, sizeof(struct w83627ehf_data),
+                           GFP_KERNEL);
        if (!data) {
                err = -ENOMEM;
                goto exit_release;
@@ -2157,16 +2158,16 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev)
                w83627ehf_set_temp_reg_ehf(data, 3);
 
                /*
-                * Temperature sources for temp1 and temp2 are selected with
+                * Temperature sources for temp2 and temp3 are selected with
                 * bank 0, registers 0x49 and 0x4a.
                 */
                data->temp_src[0] = 0;  /* SYSTIN */
                reg = w83627ehf_read_value(data, 0x49) & 0x07;
                /* Adjust to have the same mapping as other source registers */
                if (reg == 0)
-                       data->temp_src[1]++;
+                       data->temp_src[1] = 1;
                else if (reg >= 2 && reg <= 5)
-                       data->temp_src[1] += 2;
+                       data->temp_src[1] = reg + 2;
                else    /* should never happen */
                        data->have_temp &= ~(1 << 1);
                reg = w83627ehf_read_value(data, 0x4a);
@@ -2493,9 +2494,8 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev)
 
 exit_remove:
        w83627ehf_device_remove_files(dev);
-       kfree(data);
-       platform_set_drvdata(pdev, NULL);
 exit_release:
+       platform_set_drvdata(pdev, NULL);
        release_region(res->start, IOREGION_LENGTH);
 exit:
        return err;
@@ -2509,7 +2509,6 @@ static int __devexit w83627ehf_remove(struct platform_device *pdev)
        w83627ehf_device_remove_files(&pdev->dev);
        release_region(data->addr, IOREGION_LENGTH);
        platform_set_drvdata(pdev, NULL);
-       kfree(data);
 
        return 0;
 }
index 525c734..24f94f4 100644 (file)
@@ -103,8 +103,14 @@ static int sclhi(struct i2c_algo_bit_data *adap)
                 * chips may hold it low ("clock stretching") while they
                 * are processing data internally.
                 */
-               if (time_after(jiffies, start + adap->timeout))
+               if (time_after(jiffies, start + adap->timeout)) {
+                       /* Test one last time, as we may have been preempted
+                        * between last check and timeout test.
+                        */
+                       if (getscl(adap))
+                               break;
                        return -ETIMEDOUT;
+               }
                cond_resched();
        }
 #ifdef DEBUG
index 1e56061..e9c1893 100644 (file)
@@ -1386,8 +1386,10 @@ int i2c_master_send(const struct i2c_client *client, const char *buf, int count)
 
        ret = i2c_transfer(adap, &msg, 1);
 
-       /* If everything went ok (i.e. 1 msg transmitted), return #bytes
-          transmitted, else error code. */
+       /*
+        * If everything went ok (i.e. 1 msg transmitted), return #bytes
+        * transmitted, else error code.
+        */
        return (ret == 1) ? count : ret;
 }
 EXPORT_SYMBOL(i2c_master_send);
@@ -1414,8 +1416,10 @@ int i2c_master_recv(const struct i2c_client *client, char *buf, int count)
 
        ret = i2c_transfer(adap, &msg, 1);
 
-       /* If everything went ok (i.e. 1 msg transmitted), return #bytes
-          transmitted, else error code. */
+       /*
+        * If everything went ok (i.e. 1 msg received), return #bytes received,
+        * else error code.
+        */
        return (ret == 1) ? count : ret;
 }
 EXPORT_SYMBOL(i2c_master_recv);
index afc166f..7df5bfe 100644 (file)
@@ -332,7 +332,7 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer,
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        struct input_event event;
-       int retval;
+       int retval = 0;
 
        if (count < input_event_size())
                return -EINVAL;
index 3765137..f3bc418 100644 (file)
@@ -172,7 +172,7 @@ static void twl4030_vibra_close(struct input_dev *input)
 }
 
 /*** Module ***/
-#if CONFIG_PM
+#if CONFIG_PM_SLEEP
 static int twl4030_vibra_suspend(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
@@ -189,10 +189,10 @@ static int twl4030_vibra_resume(struct device *dev)
        vibra_disable_leds();
        return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
                         twl4030_vibra_suspend, twl4030_vibra_resume);
-#endif
 
 static int __devinit twl4030_vibra_probe(struct platform_device *pdev)
 {
@@ -273,9 +273,7 @@ static struct platform_driver twl4030_vibra_driver = {
        .driver         = {
                .name   = "twl4030-vibra",
                .owner  = THIS_MODULE,
-#ifdef CONFIG_PM
                .pm     = &twl4030_vibra_pm_ops,
-#endif
        },
 };
 module_platform_driver(twl4030_vibra_driver);
index bd87380..4c6a72d 100644 (file)
@@ -952,7 +952,9 @@ static const struct alps_model_info *alps_get_model(struct psmouse *psmouse, int
 
        /*
         * First try "E6 report".
-        * ALPS should return 0,0,10 or 0,0,100
+        * ALPS should return 0,0,10 or 0,0,100 if no buttons are pressed.
+        * The bits 0-2 of the first byte will be 1s if some buttons are
+        * pressed.
         */
        param[0] = 0;
        if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES) ||
@@ -968,7 +970,8 @@ static const struct alps_model_info *alps_get_model(struct psmouse *psmouse, int
        psmouse_dbg(psmouse, "E6 report: %2.2x %2.2x %2.2x",
                    param[0], param[1], param[2]);
 
-       if (param[0] != 0 || param[1] != 0 || (param[2] != 10 && param[2] != 100))
+       if ((param[0] & 0xf8) != 0 || param[1] != 0 ||
+           (param[2] != 10 && param[2] != 100))
                return NULL;
 
        /*
index 58a8775..e53f408 100644 (file)
@@ -77,6 +77,8 @@ config TABLET_USB_WACOM
        tristate "Wacom Intuos/Graphire tablet support (USB)"
        depends on USB_ARCH_HAS_HCD
        select USB
+       select NEW_LEDS
+       select LEDS_CLASS
        help
          Say Y here if you want to use the USB version of the Wacom Intuos
          or Graphire tablet.  Make sure to say Y to "Mouse support"
index 88672ec..cd3ed29 100644 (file)
@@ -926,7 +926,7 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom)
 {
        struct input_dev *input = wacom->input;
        unsigned char *data = wacom->data;
-       int count = data[1] & 0x03;
+       int count = data[1] & 0x07;
        int i;
 
        if (data[0] != 0x02)
index bdea288..a35e98a 100644 (file)
@@ -275,7 +275,7 @@ static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 }
 
 /* Programs the physical address of the device table into the IOMMU hardware */
-static void __init iommu_set_device_table(struct amd_iommu *iommu)
+static void iommu_set_device_table(struct amd_iommu *iommu)
 {
        u64 entry;
 
index c9c6053..a08a534 100644 (file)
@@ -48,8 +48,6 @@
 #define ROOT_SIZE              VTD_PAGE_SIZE
 #define CONTEXT_SIZE           VTD_PAGE_SIZE
 
-#define IS_BRIDGE_HOST_DEVICE(pdev) \
-                           ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -356,10 +354,18 @@ static int hw_pass_through = 1;
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY    (1 << 2)
 
+/* define the limit of IOMMUs supported in each domain */
+#ifdef CONFIG_X86
+# define       IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
+#else
+# define       IOMMU_UNITS_SUPPORTED   64
+#endif
+
 struct dmar_domain {
        int     id;                     /* domain id */
        int     nid;                    /* node id */
-       unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
+       DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
+                                       /* bitmap of iommus this domain uses*/
 
        struct list_head devices;       /* all devices' list */
        struct iova_domain iovad;       /* iova's that belong to this domain */
@@ -571,7 +577,7 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
        BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
        BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
 
-       iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+       iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
        if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
                return NULL;
 
@@ -584,7 +590,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
 
        domain->iommu_coherency = 1;
 
-       for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+       for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
                if (!ecap_coherent(g_iommus[i]->ecap)) {
                        domain->iommu_coherency = 0;
                        break;
@@ -598,7 +604,7 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
 
        domain->iommu_snooping = 1;
 
-       for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+       for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
                if (!ecap_sc_support(g_iommus[i]->ecap)) {
                        domain->iommu_snooping = 0;
                        break;
@@ -1334,7 +1340,7 @@ static struct dmar_domain *alloc_domain(void)
                return NULL;
 
        domain->nid = -1;
-       memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+       memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
        domain->flags = 0;
 
        return domain;
@@ -1360,7 +1366,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,
 
        domain->id = num;
        set_bit(num, iommu->domain_ids);
-       set_bit(iommu->seq_id, &domain->iommu_bmp);
+       set_bit(iommu->seq_id, domain->iommu_bmp);
        iommu->domains[num] = domain;
        spin_unlock_irqrestore(&iommu->lock, flags);
 
@@ -1385,7 +1391,7 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 
        if (found) {
                clear_bit(num, iommu->domain_ids);
-               clear_bit(iommu->seq_id, &domain->iommu_bmp);
+               clear_bit(iommu->seq_id, domain->iommu_bmp);
                iommu->domains[num] = NULL;
        }
        spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1527,7 +1533,7 @@ static void domain_exit(struct dmar_domain *domain)
        dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
        for_each_active_iommu(iommu, drhd)
-               if (test_bit(iommu->seq_id, &domain->iommu_bmp))
+               if (test_bit(iommu->seq_id, domain->iommu_bmp))
                        iommu_detach_domain(domain, iommu);
 
        free_domain_mem(domain);
@@ -1653,7 +1659,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
        spin_unlock_irqrestore(&iommu->lock, flags);
 
        spin_lock_irqsave(&domain->iommu_lock, flags);
-       if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
+       if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
                domain->iommu_count++;
                if (domain->iommu_count == 1)
                        domain->nid = iommu->node;
@@ -2369,18 +2375,18 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
                return -EFAULT;
 
        for_each_pci_dev(pdev) {
-               /* Skip Host/PCI Bridge devices */
-               if (IS_BRIDGE_HOST_DEVICE(pdev))
-                       continue;
                if (iommu_should_identity_map(pdev, 1)) {
-                       printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
-                              hw ? "hardware" : "software", pci_name(pdev));
-
                        ret = domain_add_dev_info(si_domain, pdev,
-                                                    hw ? CONTEXT_TT_PASS_THROUGH :
-                                                    CONTEXT_TT_MULTI_LEVEL);
-                       if (ret)
+                                            hw ? CONTEXT_TT_PASS_THROUGH :
+                                                 CONTEXT_TT_MULTI_LEVEL);
+                       if (ret) {
+                               /* device not associated with an iommu */
+                               if (ret == -ENODEV)
+                                       continue;
                                return ret;
+                       }
+                       pr_info("IOMMU: %s identity mapping for device %s\n",
+                               hw ? "hardware" : "software", pci_name(pdev));
                }
        }
 
@@ -2402,12 +2408,17 @@ static int __init init_dmars(void)
         * endfor
         */
        for_each_drhd_unit(drhd) {
-               g_num_of_iommus++;
                /*
                 * lock not needed as this is only incremented in the single
                 * threaded kernel __init code path all other access are read
                 * only
                 */
+               if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
+                       g_num_of_iommus++;
+                       continue;
+               }
+               printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
+                         IOMMU_UNITS_SUPPORTED);
        }
 
        g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
@@ -3748,7 +3759,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
        if (found == 0) {
                unsigned long tmp_flags;
                spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
-               clear_bit(iommu->seq_id, &domain->iommu_bmp);
+               clear_bit(iommu->seq_id, domain->iommu_bmp);
                domain->iommu_count--;
                domain_update_iommu_cap(domain);
                spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
@@ -3790,7 +3801,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
                 */
                spin_lock_irqsave(&domain->iommu_lock, flags2);
                if (test_and_clear_bit(iommu->seq_id,
-                                      &domain->iommu_bmp)) {
+                                      domain->iommu_bmp)) {
                        domain->iommu_count--;
                        domain_update_iommu_cap(domain);
                }
@@ -3815,7 +3826,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
 
        domain->id = vm_domid++;
        domain->nid = -1;
-       memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+       memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
        domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
 
        return domain;
index 9fb18c1..b280c43 100644 (file)
@@ -323,7 +323,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
         * Corrupt successful READs while in down state.
         * If flags were specified, only corrupt those that match.
         */
-       if (!error && bio_submitted_while_down &&
+       if (fc->corrupt_bio_byte && !error && bio_submitted_while_down &&
            (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
            all_corrupt_bio_flags_match(bio, fc))
                corrupt_bio_data(bio, fc);
index ad2eba4..ea5dd28 100644 (file)
@@ -296,6 +296,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
        unsigned offset;
        unsigned num_bvecs;
        sector_t remaining = where->count;
+       struct request_queue *q = bdev_get_queue(where->bdev);
+       sector_t discard_sectors;
 
        /*
         * where->count may be zero if rw holds a flush and we need to
@@ -305,9 +307,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                /*
                 * Allocate a suitably sized-bio.
                 */
-               num_bvecs = dm_sector_div_up(remaining,
-                                            (PAGE_SIZE >> SECTOR_SHIFT));
-               num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
+               if (rw & REQ_DISCARD)
+                       num_bvecs = 1;
+               else
+                       num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
+                                         dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+
                bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
                bio->bi_sector = where->sector + (where->count - remaining);
                bio->bi_bdev = where->bdev;
@@ -315,10 +320,14 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                bio->bi_destructor = dm_bio_destructor;
                store_io_and_region_in_bio(bio, io, region);
 
-               /*
-                * Try and add as many pages as possible.
-                */
-               while (remaining) {
+               if (rw & REQ_DISCARD) {
+                       discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
+                       bio->bi_size = discard_sectors << SECTOR_SHIFT;
+                       remaining -= discard_sectors;
+               } else while (remaining) {
+                       /*
+                        * Try and add as many pages as possible.
+                        */
                        dp->get_page(dp, &page, &len, &offset);
                        len = min(len, to_bytes(remaining));
                        if (!bio_add_page(bio, page, len, offset))
index 31c2dc2..1ce84ed 100644 (file)
@@ -1437,7 +1437,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 
        if (!argc) {
                DMWARN("Empty message received.");
-               goto out;
+               goto out_argv;
        }
 
        table = dm_get_live_table(md);
index 86cb7e5..787022c 100644 (file)
@@ -668,7 +668,14 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
                return ret;
 
        sb = page_address(rdev->sb_page);
-       if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
+
+       /*
+        * Two cases that we want to write new superblocks and rebuild:
+        * 1) New device (no matching magic number)
+        * 2) Device specified for rebuild (!In_sync w/ offset == 0)
+        */
+       if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) ||
+           (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) {
                super_sync(rdev->mddev, rdev);
 
                set_bit(FirstUse, &rdev->flags);
@@ -745,11 +752,8 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
         */
        rdev_for_each(r, t, mddev) {
                if (!test_bit(In_sync, &r->flags)) {
-                       if (!test_bit(FirstUse, &r->flags))
-                               DMERR("Superblock area of "
-                                     "rebuild device %d should have been "
-                                     "cleared.", r->raid_disk);
-                       set_bit(FirstUse, &r->flags);
+                       DMINFO("Device %d specified for rebuild: "
+                              "Clearing superblock", r->raid_disk);
                        rebuilds++;
                } else if (test_bit(FirstUse, &r->flags))
                        new_devs++;
@@ -971,6 +975,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
        INIT_WORK(&rs->md.event_work, do_table_event);
        ti->private = rs;
+       ti->num_flush_requests = 1;
 
        mutex_lock(&rs->md.reconfig_mutex);
        ret = md_run(&rs->md);
index 59c4f04..237571a 100644 (file)
@@ -385,6 +385,7 @@ static int init_pmd(struct dm_pool_metadata *pmd,
                data_sm = dm_sm_disk_create(tm, nr_blocks);
                if (IS_ERR(data_sm)) {
                        DMERR("sm_disk_create failed");
+                       dm_tm_unlock(tm, sblock);
                        r = PTR_ERR(data_sm);
                        goto bad;
                }
@@ -789,6 +790,11 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
        return 0;
 }
 
+/*
+ * __open_device: Returns @td corresponding to device with id @dev,
+ * creating it if @create is set and incrementing @td->open_count.
+ * On failure, @td is undefined.
+ */
 static int __open_device(struct dm_pool_metadata *pmd,
                         dm_thin_id dev, int create,
                         struct dm_thin_device **td)
@@ -799,10 +805,16 @@ static int __open_device(struct dm_pool_metadata *pmd,
        struct disk_device_details details_le;
 
        /*
-        * Check the device isn't already open.
+        * If the device is already open, return it.
         */
        list_for_each_entry(td2, &pmd->thin_devices, list)
                if (td2->id == dev) {
+                       /*
+                        * May not create an already-open device.
+                        */
+                       if (create)
+                               return -EEXIST;
+
                        td2->open_count++;
                        *td = td2;
                        return 0;
@@ -817,6 +829,9 @@ static int __open_device(struct dm_pool_metadata *pmd,
                if (r != -ENODATA || !create)
                        return r;
 
+               /*
+                * Create new device.
+                */
                changed = 1;
                details_le.mapped_blocks = 0;
                details_le.transaction_id = cpu_to_le64(pmd->trans_id);
@@ -882,12 +897,10 @@ static int __create_thin(struct dm_pool_metadata *pmd,
 
        r = __open_device(pmd, dev, 1, &td);
        if (r) {
-               __close_device(td);
                dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
                dm_btree_del(&pmd->bl_info, dev_root);
                return r;
        }
-       td->changed = 1;
        __close_device(td);
 
        return r;
@@ -967,14 +980,14 @@ static int __create_snap(struct dm_pool_metadata *pmd,
                goto bad;
 
        r = __set_snapshot_details(pmd, td, origin, pmd->time);
+       __close_device(td);
+
        if (r)
                goto bad;
 
-       __close_device(td);
        return 0;
 
 bad:
-       __close_device(td);
        dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
        dm_btree_remove(&pmd->details_info, pmd->details_root,
                        &key, &pmd->details_root);
@@ -1211,6 +1224,8 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
        if (r)
                return r;
 
+       td->mapped_blocks--;
+       td->changed = 1;
        pmd->need_commit = 1;
 
        return 0;
index a368db2..a0b225e 100644 (file)
@@ -624,7 +624,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)
                return 1;
 
        rcu_read_lock();
-       for (i = 0; i < conf->raid_disks; i++) {
+       for (i = 0; i < conf->raid_disks * 2; i++) {
                struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
                if (rdev && !test_bit(Faulty, &rdev->flags)) {
                        struct request_queue *q = bdev_get_queue(rdev->bdev);
index 6e8aa21..58c44d6 100644 (file)
@@ -67,6 +67,7 @@ static int max_queued_requests = 1024;
 
 static void allow_barrier(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
+static int enough(struct r10conf *conf, int ignore);
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -347,6 +348,19 @@ static void raid10_end_read_request(struct bio *bio, int error)
                 * wait for the 'master' bio.
                 */
                set_bit(R10BIO_Uptodate, &r10_bio->state);
+       } else {
+               /* If all other devices that store this block have
+                * failed, we want to return the error upwards rather
+                * than fail the last device.  Here we redefine
+                * "uptodate" to mean "Don't want to retry"
+                */
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
+               if (!enough(conf, rdev->raid_disk))
+                       uptodate = 1;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+       }
+       if (uptodate) {
                raid_end_bio_io(r10_bio);
                rdev_dec_pending(rdev, conf->mddev);
        } else {
@@ -2052,6 +2066,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                       "md/raid10:%s: %s: Failing raid device\n",
                       mdname(mddev), b);
                md_error(mddev, conf->mirrors[d].rdev);
+               r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
                return;
        }
 
@@ -2105,8 +2120,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                                    rdev,
                                    r10_bio->devs[r10_bio->read_slot].addr
                                    + sect,
-                                   s, 0))
+                                   s, 0)) {
                                md_error(mddev, rdev);
+                               r10_bio->devs[r10_bio->read_slot].bio
+                                       = IO_BLOCKED;
+                       }
                        break;
                }
 
@@ -2299,17 +2317,20 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
         * This is all done synchronously while the array is
         * frozen.
         */
+       bio = r10_bio->devs[slot].bio;
+       bdevname(bio->bi_bdev, b);
+       bio_put(bio);
+       r10_bio->devs[slot].bio = NULL;
+
        if (mddev->ro == 0) {
                freeze_array(conf);
                fix_read_error(conf, mddev, r10_bio);
                unfreeze_array(conf);
-       }
+       } else
+               r10_bio->devs[slot].bio = IO_BLOCKED;
+
        rdev_dec_pending(rdev, mddev);
 
-       bio = r10_bio->devs[slot].bio;
-       bdevname(bio->bi_bdev, b);
-       r10_bio->devs[slot].bio =
-               mddev->ro ? IO_BLOCKED : NULL;
 read_more:
        rdev = read_balance(conf, r10_bio, &max_sectors);
        if (rdev == NULL) {
@@ -2318,13 +2339,10 @@ read_more:
                       mdname(mddev), b,
                       (unsigned long long)r10_bio->sector);
                raid_end_bio_io(r10_bio);
-               bio_put(bio);
                return;
        }
 
        do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
-       if (bio)
-               bio_put(bio);
        slot = r10_bio->read_slot;
        printk_ratelimited(
                KERN_ERR
@@ -2360,7 +2378,6 @@ read_more:
                        mbio->bi_phys_segments++;
                spin_unlock_irq(&conf->device_lock);
                generic_make_request(bio);
-               bio = NULL;
 
                r10_bio = mempool_alloc(conf->r10bio_pool,
                                        GFP_NOIO);
@@ -3243,7 +3260,6 @@ static int run(struct mddev *mddev)
                        disk->rdev = rdev;
                }
 
-               disk->rdev = rdev;
                disk_stack_limits(mddev->gendisk, rdev->bdev,
                                  rdev->data_offset << 9);
                /* as we don't honour merge_bvec_fn, we must never risk
index 654685c..aa77e54 100644 (file)
@@ -49,9 +49,6 @@ struct smsdvb_client_t {
 
        struct completion       tune_done;
 
-       /* todo: save freq/band instead whole struct */
-       struct dtv_frontend_properties fe_params;
-
        struct SMSHOSTLIB_STATISTICS_DVB_S sms_stat_dvb;
        int event_fe_state;
        int event_unc_state;
@@ -744,12 +741,124 @@ static int smsdvb_get_frontend(struct dvb_frontend *fe)
        struct dtv_frontend_properties *fep = &fe->dtv_property_cache;
        struct smsdvb_client_t *client =
                container_of(fe, struct smsdvb_client_t, frontend);
+       struct smscore_device_t *coredev = client->coredev;
+       struct TRANSMISSION_STATISTICS_S *td =
+               &client->sms_stat_dvb.TransmissionData;
 
-       sms_debug("");
+       switch (smscore_get_device_mode(coredev)) {
+       case DEVICE_MODE_DVBT:
+       case DEVICE_MODE_DVBT_BDA:
+               fep->frequency = td->Frequency;
+
+               switch (td->Bandwidth) {
+               case 6:
+                       fep->bandwidth_hz = 6000000;
+                       break;
+               case 7:
+                       fep->bandwidth_hz = 7000000;
+                       break;
+               case 8:
+                       fep->bandwidth_hz = 8000000;
+                       break;
+               }
+
+               switch (td->TransmissionMode) {
+               case 2:
+                       fep->transmission_mode = TRANSMISSION_MODE_2K;
+                       break;
+               case 8:
+                       fep->transmission_mode = TRANSMISSION_MODE_8K;
+               }
+
+               switch (td->GuardInterval) {
+               case 0:
+                       fep->guard_interval = GUARD_INTERVAL_1_32;
+                       break;
+               case 1:
+                       fep->guard_interval = GUARD_INTERVAL_1_16;
+                       break;
+               case 2:
+                       fep->guard_interval = GUARD_INTERVAL_1_8;
+                       break;
+               case 3:
+                       fep->guard_interval = GUARD_INTERVAL_1_4;
+                       break;
+               }
+
+               switch (td->CodeRate) {
+               case 0:
+                       fep->code_rate_HP = FEC_1_2;
+                       break;
+               case 1:
+                       fep->code_rate_HP = FEC_2_3;
+                       break;
+               case 2:
+                       fep->code_rate_HP = FEC_3_4;
+                       break;
+               case 3:
+                       fep->code_rate_HP = FEC_5_6;
+                       break;
+               case 4:
+                       fep->code_rate_HP = FEC_7_8;
+                       break;
+               }
+
+               switch (td->LPCodeRate) {
+               case 0:
+                       fep->code_rate_LP = FEC_1_2;
+                       break;
+               case 1:
+                       fep->code_rate_LP = FEC_2_3;
+                       break;
+               case 2:
+                       fep->code_rate_LP = FEC_3_4;
+                       break;
+               case 3:
+                       fep->code_rate_LP = FEC_5_6;
+                       break;
+               case 4:
+                       fep->code_rate_LP = FEC_7_8;
+                       break;
+               }
+
+               switch (td->Constellation) {
+               case 0:
+                       fep->modulation = QPSK;
+                       break;
+               case 1:
+                       fep->modulation = QAM_16;
+                       break;
+               case 2:
+                       fep->modulation = QAM_64;
+                       break;
+               }
+
+               switch (td->Hierarchy) {
+               case 0:
+                       fep->hierarchy = HIERARCHY_NONE;
+                       break;
+               case 1:
+                       fep->hierarchy = HIERARCHY_1;
+                       break;
+               case 2:
+                       fep->hierarchy = HIERARCHY_2;
+                       break;
+               case 3:
+                       fep->hierarchy = HIERARCHY_4;
+                       break;
+               }
 
-       /* todo: */
-       memcpy(fep, &client->fe_params,
-              sizeof(struct dtv_frontend_properties));
+               fep->inversion = INVERSION_AUTO;
+               break;
+       case DEVICE_MODE_ISDBT:
+       case DEVICE_MODE_ISDBT_BDA:
+               fep->frequency = td->Frequency;
+               fep->bandwidth_hz = 6000000;
+               /* todo: retrive the other parameters */
+               break;
+       default:
+               return -EINVAL;
+       }
 
        return 0;
 }
@@ -872,11 +981,11 @@ static int smsdvb_hotplug(struct smscore_device_t *coredev,
        switch (smscore_get_device_mode(coredev)) {
        case DEVICE_MODE_DVBT:
        case DEVICE_MODE_DVBT_BDA:
-               smsdvb_fe_ops.delsys[0] = SYS_DVBT;
+               client->frontend.ops.delsys[0] = SYS_DVBT;
                break;
        case DEVICE_MODE_ISDBT:
        case DEVICE_MODE_ISDBT_BDA:
-               smsdvb_fe_ops.delsys[0] = SYS_ISDBT;
+               client->frontend.ops.delsys[0] = SYS_ISDBT;
                break;
        }
 
index 1e63852..5278fe7 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/videodev2.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/module.h>
 
 #include <mach/mux.h>
 
index c7e69b8..4a44f9a 100644 (file)
@@ -611,9 +611,11 @@ void uvc_video_clock_update(struct uvc_streaming *stream,
        delta_stc = buf->pts - (1UL << 31);
        x1 = first->dev_stc - delta_stc;
        x2 = last->dev_stc - delta_stc;
+       if (x1 == x2)
+               goto done;
+
        y1 = (first->dev_sof + 2048) << 16;
        y2 = (last->dev_sof + 2048) << 16;
-
        if (y2 < y1)
                y2 += 2048 << 16;
 
@@ -631,14 +633,16 @@ void uvc_video_clock_update(struct uvc_streaming *stream,
                  x1, x2, y1, y2, clock->sof_offset);
 
        /* Second step, SOF to host clock conversion. */
-       ts = timespec_sub(last->host_ts, first->host_ts);
        x1 = (uvc_video_clock_host_sof(first) + 2048) << 16;
        x2 = (uvc_video_clock_host_sof(last) + 2048) << 16;
-       y1 = NSEC_PER_SEC;
-       y2 = (ts.tv_sec + 1) * NSEC_PER_SEC + ts.tv_nsec;
-
        if (x2 < x1)
                x2 += 2048 << 16;
+       if (x1 == x2)
+               goto done;
+
+       ts = timespec_sub(last->host_ts, first->host_ts);
+       y1 = NSEC_PER_SEC;
+       y2 = (ts.tv_sec + 1) * NSEC_PER_SEC + ts.tv_nsec;
 
        /* Interpolated and host SOF timestamps can wrap around at slightly
         * different times. Handle this by adding or removing 2048 to or from
index 53e2a80..d295941 100644 (file)
@@ -956,11 +956,12 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
        return ret;
 
 out_freeirq:
-       if (ab8500->irq_base) {
+       if (ab8500->irq_base)
                free_irq(ab8500->irq, ab8500);
 out_removeirq:
+       if (ab8500->irq_base)
                ab8500_irq_remove(ab8500);
-       }
+
        return ret;
 }
 
index 0f59228..411f523 100644 (file)
@@ -123,7 +123,7 @@ static int mfd_add_device(struct device *parent, int id,
                }
 
                if (!cell->ignore_resource_conflicts) {
-                       ret = acpi_check_resource_conflict(res);
+                       ret = acpi_check_resource_conflict(&res[r]);
                        if (ret)
                                goto fail_res;
                }
index e075c11..caadabe 100644 (file)
@@ -105,7 +105,7 @@ static int s5m87xx_i2c_probe(struct i2c_client *i2c,
        s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
        i2c_set_clientdata(s5m87xx->rtc, s5m87xx);
 
-       if (pdata->cfg_pmic_irq)
+       if (pdata && pdata->cfg_pmic_irq)
                pdata->cfg_pmic_irq();
 
        s5m_irq_init(s5m87xx);
index 01cf501..4392f6b 100644 (file)
@@ -168,7 +168,7 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
                goto err;
 
        init_data->irq = pmic_plat_data->irq;
-       init_data->irq_base = pmic_plat_data->irq;
+       init_data->irq_base = pmic_plat_data->irq_base;
 
        tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
 
index 5fec23a..74fd8cb 100644 (file)
@@ -151,7 +151,7 @@ int tps65912_device_init(struct tps65912 *tps65912)
                goto err;
 
        init_data->irq = pmic_plat_data->irq;
-       init_data->irq_base = pmic_plat_data->irq;
+       init_data->irq_base = pmic_plat_data->irq_base;
        ret = tps65912_irq_init(tps65912, init_data->irq, init_data);
        if (ret < 0)
                goto err;
index 8a1fafd..9fd01bf 100644 (file)
@@ -496,7 +496,6 @@ int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 
        mutex_init(&wm8350->irq_lock);
        wm8350->chip_irq = irq;
-       wm8350->irq_base = pdata->irq_base;
 
        if (pdata && pdata->irq_base > 0)
                irq_base = pdata->irq_base;
index f117e7f..a04b3c1 100644 (file)
@@ -256,6 +256,20 @@ static int wm8994_suspend(struct device *dev)
                break;
        }
 
+       switch (wm8994->type) {
+       case WM1811:
+               ret = wm8994_reg_read(wm8994, WM8994_ANTIPOP_2);
+               if (ret < 0) {
+                       dev_err(dev, "Failed to read jackdet: %d\n", ret);
+               } else if (ret & WM1811_JACKDET_MODE_MASK) {
+                       dev_dbg(dev, "CODEC still active, ignoring suspend\n");
+                       return 0;
+               }
+               break;
+       default:
+               break;
+       }
+
        /* Disable LDO pulldowns while the device is suspended if we
         * don't know that something will be driving them. */
        if (!wm8994->ldo_ena_always_driven)
index c598ae6..bc0c509 100644 (file)
@@ -806,6 +806,7 @@ static bool wm1811_readable_register(struct device *dev, unsigned int reg)
        case WM8994_DC_SERVO_2:
        case WM8994_DC_SERVO_READBACK:
        case WM8994_DC_SERVO_4:
+       case WM8994_DC_SERVO_4E:
        case WM8994_ANALOGUE_HP_1:
        case WM8958_MIC_DETECT_1:
        case WM8958_MIC_DETECT_2:
index 83adab6..8208262 100644 (file)
@@ -113,17 +113,7 @@ static struct i2c_driver ad_dpot_i2c_driver = {
        .id_table       = ad_dpot_id,
 };
 
-static int __init ad_dpot_i2c_init(void)
-{
-       return i2c_add_driver(&ad_dpot_i2c_driver);
-}
-module_init(ad_dpot_i2c_init);
-
-static void __exit ad_dpot_i2c_exit(void)
-{
-       i2c_del_driver(&ad_dpot_i2c_driver);
-}
-module_exit(ad_dpot_i2c_exit);
+module_i2c_driver(ad_dpot_i2c_driver);
 
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
index 822749e..f623175 100644 (file)
@@ -135,17 +135,7 @@ static struct spi_driver ad_dpot_spi_driver = {
        .id_table       = ad_dpot_spi_id,
 };
 
-static int __init ad_dpot_spi_init(void)
-{
-       return spi_register_driver(&ad_dpot_spi_driver);
-}
-module_init(ad_dpot_spi_init);
-
-static void __exit ad_dpot_spi_exit(void)
-{
-       spi_unregister_driver(&ad_dpot_spi_driver);
-}
-module_exit(ad_dpot_spi_exit);
+module_spi_driver(ad_dpot_spi_driver);
 
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("digital potentiometer SPI bus driver");
index 81db781..0314773 100644 (file)
@@ -332,17 +332,7 @@ static struct i2c_driver apds9802als_driver = {
        .id_table = apds9802als_id,
 };
 
-static int __init sensor_apds9802als_init(void)
-{
-       return i2c_add_driver(&apds9802als_driver);
-}
-
-static void  __exit sensor_apds9802als_exit(void)
-{
-       i2c_del_driver(&apds9802als_driver);
-}
-module_init(sensor_apds9802als_init);
-module_exit(sensor_apds9802als_exit);
+module_i2c_driver(apds9802als_driver);
 
 MODULE_AUTHOR("Anantha Narayanan <Anantha.Narayanan@intel.com");
 MODULE_DESCRIPTION("Avago apds9802als ALS Driver");
index e2a52e5..ee74244 100644 (file)
@@ -1279,19 +1279,8 @@ static struct i2c_driver apds990x_driver = {
        .id_table = apds990x_id,
 };
 
-static int __init apds990x_init(void)
-{
-       return i2c_add_driver(&apds990x_driver);
-}
-
-static void __exit apds990x_exit(void)
-{
-       i2c_del_driver(&apds990x_driver);
-}
+module_i2c_driver(apds990x_driver);
 
 MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor");
 MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
 MODULE_LICENSE("GPL v2");
-
-module_init(apds990x_init);
-module_exit(apds990x_exit);
index d79a972..3d56ae7 100644 (file)
@@ -1399,19 +1399,8 @@ static struct i2c_driver bh1770_driver = {
        .id_table = bh1770_id,
 };
 
-static int __init bh1770_init(void)
-{
-       return i2c_add_driver(&bh1770_driver);
-}
-
-static void __exit bh1770_exit(void)
-{
-       i2c_del_driver(&bh1770_driver);
-}
+module_i2c_driver(bh1770_driver);
 
 MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor");
 MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
 MODULE_LICENSE("GPL v2");
-
-module_init(bh1770_init);
-module_exit(bh1770_exit);
index bfeea9b..54f6f39 100644 (file)
@@ -253,21 +253,10 @@ static struct i2c_driver bh1780_driver = {
        .driver = {
                .name = "bh1780",
                .pm     = BH1780_PMOPS,
-},
+       },
 };
 
-static int __init bh1780_init(void)
-{
-       return i2c_add_driver(&bh1780_driver);
-}
-
-static void __exit bh1780_exit(void)
-{
-       i2c_del_driver(&bh1780_driver);
-}
-
-module_init(bh1780_init)
-module_exit(bh1780_exit)
+module_i2c_driver(bh1780_driver);
 
 MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver");
 MODULE_LICENSE("GPL");
index b29a2be..76c3064 100644 (file)
@@ -87,7 +87,7 @@ struct bmp085_data {
        u32 raw_temperature;
        u32 raw_pressure;
        unsigned char oversampling_setting;
-       u32 last_temp_measurement;
+       unsigned long last_temp_measurement;
        s32 b6; /* calculated temperature correction coefficient */
 };
 
@@ -234,7 +234,8 @@ static s32 bmp085_get_pressure(struct bmp085_data *data, int *pressure)
        int status;
 
        /* alt least every second force an update of the ambient temperature */
-       if (data->last_temp_measurement + 1*HZ < jiffies) {
+       if (data->last_temp_measurement == 0 ||
+                       time_is_before_jiffies(data->last_temp_measurement + 1*HZ)) {
                status = bmp085_get_temperature(data, NULL);
                if (status != 0)
                        goto exit;
@@ -464,20 +465,8 @@ static struct i2c_driver bmp085_driver = {
        .address_list   = normal_i2c
 };
 
-static int __init bmp085_init(void)
-{
-       return i2c_add_driver(&bmp085_driver);
-}
-
-static void __exit bmp085_exit(void)
-{
-       i2c_del_driver(&bmp085_driver);
-}
-
+module_i2c_driver(bmp085_driver);
 
 MODULE_AUTHOR("Christoph Mair <christoph.mair@gmail.com");
 MODULE_DESCRIPTION("BMP085 driver");
 MODULE_LICENSE("GPL");
-
-module_init(bmp085_init);
-module_exit(bmp085_exit);
index 19fc7c1..f428d86 100644 (file)
@@ -984,9 +984,9 @@ static int __init c2port_init(void)
                " - (C) 2007 Rodolfo Giometti\n");
 
        c2port_class = class_create(THIS_MODULE, "c2port");
-       if (!c2port_class) {
+       if (IS_ERR(c2port_class)) {
                printk(KERN_ERR "c2port: failed to allocate class\n");
-               return -ENOMEM;
+               return PTR_ERR(c2port_class);
        }
        c2port_class->dev_attrs = c2port_attrs;
 
index 14e974b..366bc15 100644 (file)
@@ -1410,23 +1410,8 @@ static struct platform_driver data_of_driver = {
        },
 };
 
-/*
- * Module Init / Exit
- */
-
-static int __init data_init(void)
-{
-       return platform_driver_register(&data_of_driver);
-}
-
-static void __exit data_exit(void)
-{
-       platform_driver_unregister(&data_of_driver);
-}
+module_platform_driver(data_of_driver);
 
 MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
 MODULE_DESCRIPTION("CARMA DATA-FPGA Access Driver");
 MODULE_LICENSE("GPL");
-
-module_init(data_init);
-module_exit(data_exit);
index 87a390d..f505a40 100644 (file)
@@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
  * Jordan tells me that he and Mitch once played w/ it, but it's unclear
  * what the results of that were (and they experienced some instability).
  */
-static void __init reset_all_timers(void)
+static void __devinit reset_all_timers(void)
 {
        uint32_t val, dummy;
 
index a513f0a..154b02e 100644 (file)
@@ -250,19 +250,8 @@ static struct i2c_driver ds1682_driver = {
        .id_table = ds1682_id,
 };
 
-static int __init ds1682_init(void)
-{
-       return i2c_add_driver(&ds1682_driver);
-}
-
-static void __exit ds1682_exit(void)
-{
-       i2c_del_driver(&ds1682_driver);
-}
+module_i2c_driver(ds1682_driver);
 
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
 MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver");
 MODULE_LICENSE("GPL");
-
-module_init(ds1682_init);
-module_exit(ds1682_exit);
index c627e41..01ab3c9 100644 (file)
@@ -405,17 +405,7 @@ static struct spi_driver at25_driver = {
        .remove         = __devexit_p(at25_remove),
 };
 
-static int __init at25_init(void)
-{
-       return spi_register_driver(&at25_driver);
-}
-module_init(at25_init);
-
-static void __exit at25_exit(void)
-{
-       spi_unregister_driver(&at25_driver);
-}
-module_exit(at25_exit);
+module_spi_driver(at25_driver);
 
 MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
 MODULE_AUTHOR("David Brownell");
index 45060dd..c169e07 100644 (file)
@@ -229,22 +229,10 @@ static struct i2c_driver eeprom_driver = {
        .address_list   = normal_i2c,
 };
 
-static int __init eeprom_init(void)
-{
-       return i2c_add_driver(&eeprom_driver);
-}
-
-static void __exit eeprom_exit(void)
-{
-       i2c_del_driver(&eeprom_driver);
-}
-
+module_i2c_driver(eeprom_driver);
 
 MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and "
                "Philip Edelbrock <phil@netroedge.com> and "
                "Greg Kroah-Hartman <greg@kroah.com>");
 MODULE_DESCRIPTION("I2C EEPROM driver");
 MODULE_LICENSE("GPL");
-
-module_init(eeprom_init);
-module_exit(eeprom_exit);
index 0c7ebb1..ce3fe36 100644 (file)
@@ -392,17 +392,7 @@ static struct spi_driver eeprom_93xx46_driver = {
        .remove         = __devexit_p(eeprom_93xx46_remove),
 };
 
-static int __init eeprom_93xx46_init(void)
-{
-       return spi_register_driver(&eeprom_93xx46_driver);
-}
-module_init(eeprom_93xx46_init);
-
-static void __exit eeprom_93xx46_exit(void)
-{
-       spi_unregister_driver(&eeprom_93xx46_driver);
-}
-module_exit(eeprom_93xx46_exit);
+module_spi_driver(eeprom_93xx46_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
index 5653a3c..e36157d 100644 (file)
@@ -208,20 +208,8 @@ static struct i2c_driver max6875_driver = {
        .id_table       = max6875_id,
 };
 
-static int __init max6875_init(void)
-{
-       return i2c_add_driver(&max6875_driver);
-}
-
-static void __exit max6875_exit(void)
-{
-       i2c_del_driver(&max6875_driver);
-}
-
+module_i2c_driver(max6875_driver);
 
 MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
 MODULE_DESCRIPTION("MAX6875 driver");
 MODULE_LICENSE("GPL");
-
-module_init(max6875_init);
-module_exit(max6875_exit);
index f6586d5..ac96c3a 100644 (file)
@@ -458,7 +458,6 @@ fail2:
        if (client->irq)
                free_irq(client->irq, usbsw);
 fail1:
-       i2c_set_clientdata(client, NULL);
        kfree(usbsw);
        return ret;
 }
@@ -468,7 +467,6 @@ static int __devexit fsa9480_remove(struct i2c_client *client)
        struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
        if (client->irq)
                free_irq(client->irq, usbsw);
-       i2c_set_clientdata(client, NULL);
 
        sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
        device_init_wakeup(&client->dev, 0);
@@ -541,17 +539,7 @@ static struct i2c_driver fsa9480_i2c_driver = {
        .id_table = fsa9480_id,
 };
 
-static int __init fsa9480_init(void)
-{
-       return i2c_add_driver(&fsa9480_i2c_driver);
-}
-module_init(fsa9480_init);
-
-static void __exit fsa9480_exit(void)
-{
-       i2c_del_driver(&fsa9480_i2c_driver);
-}
-module_exit(fsa9480_exit);
+module_i2c_driver(fsa9480_i2c_driver);
 
 MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
 MODULE_DESCRIPTION("FSA9480 USB Switch driver");
index ca938fc..423cd40 100644 (file)
@@ -148,18 +148,7 @@ static struct i2c_driver hmc6352_driver = {
        .id_table = hmc6352_id,
 };
 
-static int __init sensor_hmc6352_init(void)
-{
-       return i2c_add_driver(&hmc6352_driver);
-}
-
-static void  __exit sensor_hmc6352_exit(void)
-{
-       i2c_del_driver(&hmc6352_driver);
-}
-
-module_init(sensor_hmc6352_init);
-module_exit(sensor_hmc6352_exit);
+module_i2c_driver(hmc6352_driver);
 
 MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com");
 MODULE_DESCRIPTION("hmc6352 Compass Driver");
index 152e9d9..0029536 100644 (file)
@@ -480,23 +480,12 @@ static int ics932s401_remove(struct i2c_client *client)
        return 0;
 }
 
-static int __init ics932s401_init(void)
-{
-       return i2c_add_driver(&ics932s401_driver);
-}
-
-static void __exit ics932s401_exit(void)
-{
-       i2c_del_driver(&ics932s401_driver);
-}
+module_i2c_driver(ics932s401_driver);
 
 MODULE_AUTHOR("Darrick J. Wong <djwong@us.ibm.com>");
 MODULE_DESCRIPTION("ICS932S401 driver");
 MODULE_LICENSE("GPL");
 
-module_init(ics932s401_init);
-module_exit(ics932s401_exit);
-
 /* IBM IntelliStation Z30 */
 MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*");
 MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*");
index a71e245..eb5de2e 100644 (file)
@@ -455,21 +455,9 @@ static struct i2c_driver isl29003_driver = {
        .id_table = isl29003_id,
 };
 
-static int __init isl29003_init(void)
-{
-       return i2c_add_driver(&isl29003_driver);
-}
-
-static void __exit isl29003_exit(void)
-{
-       i2c_del_driver(&isl29003_driver);
-}
+module_i2c_driver(isl29003_driver);
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRIVER_VERSION);
-
-module_init(isl29003_init);
-module_exit(isl29003_exit);
-
index 3d6cce6..0aa08c7 100644 (file)
@@ -230,18 +230,7 @@ static struct i2c_driver isl29020_driver = {
        .id_table = isl29020_id,
 };
 
-static int __init sensor_isl29020_init(void)
-{
-       return i2c_add_driver(&isl29020_driver);
-}
-
-static void  __exit sensor_isl29020_exit(void)
-{
-       i2c_del_driver(&isl29020_driver);
-}
-
-module_init(sensor_isl29020_init);
-module_exit(sensor_isl29020_exit);
+module_i2c_driver(isl29020_driver);
 
 MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>");
 MODULE_DESCRIPTION("Intersil isl29020 ALS Driver");
index c02fea0..e8c0019 100644 (file)
@@ -256,19 +256,8 @@ static struct i2c_driver lis3lv02d_i2c_driver = {
        .id_table = lis3lv02d_id,
 };
 
-static int __init lis3lv02d_init(void)
-{
-       return i2c_add_driver(&lis3lv02d_i2c_driver);
-}
-
-static void __exit lis3lv02d_exit(void)
-{
-       i2c_del_driver(&lis3lv02d_i2c_driver);
-}
+module_i2c_driver(lis3lv02d_i2c_driver);
 
 MODULE_AUTHOR("Nokia Corporation");
 MODULE_DESCRIPTION("lis3lv02d I2C interface");
 MODULE_LICENSE("GPL");
-
-module_init(lis3lv02d_init);
-module_exit(lis3lv02d_exit);
index b2c1be1..80880e9 100644 (file)
@@ -126,18 +126,7 @@ static struct spi_driver lis302dl_spi_driver = {
        .remove = __devexit_p(lis302dl_spi_remove),
 };
 
-static int __init lis302dl_init(void)
-{
-       return spi_register_driver(&lis302dl_spi_driver);
-}
-
-static void __exit lis302dl_exit(void)
-{
-       spi_unregister_driver(&lis302dl_spi_driver);
-}
-
-module_init(lis302dl_init);
-module_exit(lis302dl_exit);
+module_spi_driver(lis302dl_spi_driver);
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
index d74ef41..19591ea 100644 (file)
@@ -488,17 +488,7 @@ static struct platform_driver max8997_muic_driver = {
        .remove         = __devexit_p(max8997_muic_remove),
 };
 
-static int __init max8997_muic_init(void)
-{
-       return platform_driver_register(&max8997_muic_driver);
-}
-module_init(max8997_muic_init);
-
-static void __exit max8997_muic_exit(void)
-{
-       platform_driver_unregister(&max8997_muic_driver);
-}
-module_exit(max8997_muic_exit);
+module_platform_driver(max8997_muic_driver);
 
 MODULE_DESCRIPTION("Maxim MAX8997 MUIC driver");
 MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>");
index 43d073b..123ed98 100644 (file)
@@ -891,17 +891,7 @@ static struct platform_driver spear_pcie_gadget_driver = {
        },
 };
 
-static int __init spear_pcie_gadget_init(void)
-{
-       return platform_driver_register(&spear_pcie_gadget_driver);
-}
-module_init(spear_pcie_gadget_init);
-
-static void __exit spear_pcie_gadget_exit(void)
-{
-       platform_driver_unregister(&spear_pcie_gadget_driver);
-}
-module_exit(spear_pcie_gadget_exit);
+module_platform_driver(spear_pcie_gadget_driver);
 
 MODULE_ALIAS("platform:pcie-gadget-spear");
 MODULE_AUTHOR("Pratyush Anand");
index a7a861c..7c14f8f 100644 (file)
@@ -837,19 +837,8 @@ static struct platform_driver kim_platform_driver = {
        },
 };
 
-static int __init st_kim_init(void)
-{
-       return platform_driver_register(&kim_platform_driver);
-}
-
-static void __exit st_kim_deinit(void)
-{
-       platform_driver_unregister(&kim_platform_driver);
-}
-
+module_platform_driver(kim_platform_driver);
 
-module_init(st_kim_init);
-module_exit(st_kim_deinit);
 MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>");
 MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips ");
 MODULE_LICENSE("GPL");
index d3f229a..5acbba1 100644 (file)
@@ -82,20 +82,9 @@ static struct spi_driver dac7512_driver = {
        .remove = __devexit_p(dac7512_remove),
 };
 
-static int __init dac7512_init(void)
-{
-       return spi_register_driver(&dac7512_driver);
-}
-
-static void __exit dac7512_exit(void)
-{
-       spi_unregister_driver(&dac7512_driver);
-}
+module_spi_driver(dac7512_driver);
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("DAC7512 16-bit DAC");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRIVER_VERSION);
-
-module_init(dac7512_init);
-module_exit(dac7512_exit);
index 483ae5f..0beb298 100644 (file)
@@ -454,20 +454,9 @@ static struct i2c_driver tsl2550_driver = {
        .id_table = tsl2550_id,
 };
 
-static int __init tsl2550_init(void)
-{
-       return i2c_add_driver(&tsl2550_driver);
-}
-
-static void __exit tsl2550_exit(void)
-{
-       i2c_del_driver(&tsl2550_driver);
-}
+module_i2c_driver(tsl2550_driver);
 
 MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
 MODULE_DESCRIPTION("TSL2550 ambient light sensor driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
-
-module_init(tsl2550_init);
-module_exit(tsl2550_exit);
index 690255c..132378b 100644 (file)
@@ -2068,6 +2068,9 @@ static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
         */
        mmc_hw_reset_for_init(host);
 
+       /* Initialization should be done at 3.3 V I/O voltage. */
+       mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, 0);
+
        /*
         * sdio_reset sends CMD52 to reset card.  Since we do not know
         * if the card is being re-initialized, just send it.  CMD52
index 30055f2..c3704e2 100644 (file)
@@ -238,10 +238,10 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
        /* Hold MCI clock for 8 cycles by default */
        host->clk_delay = 8;
        /*
-        * Default clock gating delay is 200ms.
+        * Default clock gating delay is 0ms to avoid wasting power.
         * This value can be tuned by writing into sysfs entry.
         */
-       host->clkgate_delay = 200;
+       host->clkgate_delay = 0;
        host->clk_gated = false;
        INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
        spin_lock_init(&host->clk_lock);
index a480663..2b9ed14 100644 (file)
@@ -816,6 +816,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
        if (!mmc_host_is_spi(host))
                mmc_set_bus_mode(host, MMC_BUSMODE_OPENDRAIN);
 
+       /* Initialization should be done at 3.3 V I/O voltage. */
+       mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, 0);
+
        /*
         * Since we're changing the OCR value, we seem to
         * need to tell some cards to go back to the idle
index 5017f93..c272c68 100644 (file)
@@ -911,6 +911,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
        BUG_ON(!host);
        WARN_ON(!host->claimed);
 
+       /* The initialization should be done at 3.3 V I/O voltage. */
+       mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, 0);
+
        err = mmc_sd_get_cid(host, ocr, cid, &rocr);
        if (err)
                return err;
@@ -1156,11 +1159,6 @@ int mmc_attach_sd(struct mmc_host *host)
        BUG_ON(!host);
        WARN_ON(!host->claimed);
 
-       /* Make sure we are at 3.3V signalling voltage */
-       err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, false);
-       if (err)
-               return err;
-
        /* Disable preset value enable if already set since last time */
        if (host->ops->enable_preset_value) {
                mmc_host_clk_hold(host);
index 12cde6e..2c7c83f 100644 (file)
@@ -585,6 +585,9 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
         * Inform the card of the voltage
         */
        if (!powered_resume) {
+               /* The initialization should be done at 3.3 V I/O voltage. */
+               mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, 0);
+
                err = mmc_send_io_op_cond(host, host->ocr, &ocr);
                if (err)
                        goto err;
@@ -996,6 +999,11 @@ static int mmc_sdio_power_restore(struct mmc_host *host)
         * With these steps taken, mmc_select_voltage() is also required to
         * restore the correct voltage setting of the card.
         */
+
+       /* The initialization should be done at 3.3 V I/O voltage. */
+       if (!mmc_card_keep_power(host))
+               mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330, 0);
+
        sdio_reset(host);
        mmc_go_idle(host);
        mmc_send_if_cond(host, host->ocr_avail);
index 6985cdb..e4449a5 100644 (file)
@@ -1948,12 +1948,12 @@ static bool atmci_filter(struct dma_chan *chan, void *slave)
        }
 }
 
-static void atmci_configure_dma(struct atmel_mci *host)
+static bool atmci_configure_dma(struct atmel_mci *host)
 {
        struct mci_platform_data        *pdata;
 
        if (host == NULL)
-               return;
+               return false;
 
        pdata = host->pdev->dev.platform_data;
 
@@ -1970,12 +1970,15 @@ static void atmci_configure_dma(struct atmel_mci *host)
                host->dma.chan =
                        dma_request_channel(mask, atmci_filter, pdata->dma_slave);
        }
-       if (!host->dma.chan)
-               dev_notice(&host->pdev->dev, "DMA not available, using PIO\n");
-       else
+       if (!host->dma.chan) {
+               dev_warn(&host->pdev->dev, "no DMA channel available\n");
+               return false;
+       } else {
                dev_info(&host->pdev->dev,
                                        "Using %s for DMA transfers\n",
                                        dma_chan_name(host->dma.chan));
+               return true;
+       }
 }
 
 static inline unsigned int atmci_get_version(struct atmel_mci *host)
@@ -2085,8 +2088,7 @@ static int __init atmci_probe(struct platform_device *pdev)
 
        /* Get MCI capabilities and set operations according to it */
        atmci_get_cap(host);
-       if (host->caps.has_dma) {
-               dev_info(&pdev->dev, "using DMA\n");
+       if (host->caps.has_dma && atmci_configure_dma(host)) {
                host->prepare_data = &atmci_prepare_data_dma;
                host->submit_data = &atmci_submit_data_dma;
                host->stop_transfer = &atmci_stop_transfer_dma;
@@ -2096,15 +2098,12 @@ static int __init atmci_probe(struct platform_device *pdev)
                host->submit_data = &atmci_submit_data_pdc;
                host->stop_transfer = &atmci_stop_transfer_pdc;
        } else {
-               dev_info(&pdev->dev, "no DMA, no PDC\n");
+               dev_info(&pdev->dev, "using PIO\n");
                host->prepare_data = &atmci_prepare_data;
                host->submit_data = &atmci_submit_data;
                host->stop_transfer = &atmci_stop_transfer;
        }
 
-       if (host->caps.has_dma)
-               atmci_configure_dma(host);
-
        platform_set_drvdata(pdev, host);
 
        /* We need at least one slot to succeed */
index 0d955ff..11e589c 100644 (file)
@@ -1271,12 +1271,13 @@ static int __devinit mmci_probe(struct amba_device *dev,
        /*
         * Block size can be up to 2048 bytes, but must be a power of two.
         */
-       mmc->max_blk_size = 2048;
+       mmc->max_blk_size = 1 << 11;
 
        /*
-        * No limit on the number of blocks transferred.
+        * Limit the number of blocks transferred so that we don't overflow
+        * the maximum request size.
         */
-       mmc->max_blk_count = mmc->max_req_size;
+       mmc->max_blk_count = mmc->max_req_size >> 11;
 
        spin_lock_init(&host->lock);
 
index d601e41..0be4e20 100644 (file)
@@ -269,8 +269,9 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
                imx_data->scratchpad = val;
                return;
        case SDHCI_COMMAND:
-               if ((host->cmd->opcode == MMC_STOP_TRANSMISSION)
-                       && (imx_data->flags & ESDHC_FLAG_MULTIBLK_NO_INT))
+               if ((host->cmd->opcode == MMC_STOP_TRANSMISSION ||
+                    host->cmd->opcode == MMC_SET_BLOCK_COUNT) &&
+                   (imx_data->flags & ESDHC_FLAG_MULTIBLK_NO_INT))
                        val |= SDHCI_CMD_ABORTCMD;
 
                if (is_imx6q_usdhc(imx_data)) {
index 0a4fc62..c998e1a 100644 (file)
@@ -978,7 +978,7 @@ static void cfhsi_setup(struct net_device *dev)
        dev->netdev_ops = &cfhsi_ops;
        dev->type = ARPHRD_CAIF;
        dev->flags = IFF_POINTOPOINT | IFF_NOARP;
-       dev->mtu = CFHSI_MAX_PAYLOAD_SZ;
+       dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
        dev->tx_queue_len = 0;
        dev->destructor = free_netdev;
        skb_queue_head_init(&cfhsi->qhead);
index 1ff3c6d..47a9bb2 100644 (file)
@@ -1710,7 +1710,7 @@ static irqreturn_t atl1c_intr(int irq, void *data)
                                        "atl1c hardware error (status = 0x%x)\n",
                                        status & ISR_ERROR);
                        /* reset MAC */
-                       adapter->work_event |= ATL1C_WORK_EVENT_RESET;
+                       set_bit(ATL1C_WORK_EVENT_RESET, &adapter->work_event);
                        schedule_work(&adapter->common_task);
                        return IRQ_HANDLED;
                }
index 7aee469..99389c8 100644 (file)
@@ -1934,7 +1934,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        }
 
        if (bp->port.pmf)
-               bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 0);
+               bnx2x_update_drv_flags(bp, 1 << DRV_FLAGS_DCB_CONFIGURED, 0);
        else
                bnx2x__link_status_update(bp);
 
index bf27c54..4f40f7d 100644 (file)
@@ -1179,10 +1179,16 @@ static inline int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp,
  */
 static inline u8 bnx2x_stats_id(struct bnx2x_fastpath *fp)
 {
-       if (!CHIP_IS_E1x(fp->bp))
+       struct bnx2x *bp = fp->bp;
+       if (!CHIP_IS_E1x(bp)) {
+#ifdef BCM_CNIC
+               /* there are special statistics counters for FCoE 136..140 */
+               if (IS_FCOE_FP(fp))
+                       return bp->cnic_base_cl_id + (bp->pf_num >> 1);
+#endif
                return fp->cl_id;
-       else
-               return fp->cl_id + BP_PORT(fp->bp) * FP_SB_MAX_E1x;
+       }
+       return fp->cl_id + BP_PORT(bp) * FP_SB_MAX_E1x;
 }
 
 static inline void bnx2x_init_vlan_mac_fp_objs(struct bnx2x_fastpath *fp,
index 5051cf3..6d82ade 100644 (file)
@@ -735,7 +735,9 @@ void bnx2x_dcbx_set_params(struct bnx2x *bp, u32 state)
                                                 bp->dcbx_error);
 
                        /* mark DCBX result for PMF migration */
-                       bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 1);
+                       bnx2x_update_drv_flags(bp,
+                                              1 << DRV_FLAGS_DCB_CONFIGURED,
+                                              1);
 #ifdef BCM_DCBNL
                        /*
                         * Add new app tlvs to dcbnl
@@ -1020,7 +1022,7 @@ void bnx2x_dcbx_init(struct bnx2x *bp)
                DP(NETIF_MSG_LINK, "dcbx_lldp_params_offset 0x%x\n",
                   dcbx_lldp_params_offset);
 
-               bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 0);
+               bnx2x_update_drv_flags(bp, 1 << DRV_FLAGS_DCB_CONFIGURED, 0);
 
                if (SHMEM_LLDP_DCBX_PARAMS_NONE != dcbx_lldp_params_offset) {
                        bnx2x_dcbx_admin_mib_updated_params(bp,
@@ -1857,7 +1859,7 @@ void bnx2x_dcbx_pmf_update(struct bnx2x *bp)
         * read it from shmem and update bp and netdev accordingly
         */
        if (SHMEM2_HAS(bp, drv_flags) &&
-          GET_FLAGS(SHMEM2_RD(bp, drv_flags), DRV_FLAGS_DCB_CONFIGURED)) {
+          GET_FLAGS(SHMEM2_RD(bp, drv_flags), 1 << DRV_FLAGS_DCB_CONFIGURED)) {
                /* Read neg results if dcbx is in the FW */
                if (bnx2x_dcbx_read_shmem_neg_results(bp))
                        return;
index 2545213..b69f876 100644 (file)
@@ -10824,38 +10824,36 @@ do {                                                                  \
 
 int bnx2x_init_firmware(struct bnx2x *bp)
 {
+       const char *fw_file_name;
        struct bnx2x_fw_file_hdr *fw_hdr;
        int rc;
 
+       if (bp->firmware)
+               return 0;
 
-       if (!bp->firmware) {
-               const char *fw_file_name;
-
-               if (CHIP_IS_E1(bp))
-                       fw_file_name = FW_FILE_NAME_E1;
-               else if (CHIP_IS_E1H(bp))
-                       fw_file_name = FW_FILE_NAME_E1H;
-               else if (!CHIP_IS_E1x(bp))
-                       fw_file_name = FW_FILE_NAME_E2;
-               else {
-                       BNX2X_ERR("Unsupported chip revision\n");
-                       return -EINVAL;
-               }
-               BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
+       if (CHIP_IS_E1(bp))
+               fw_file_name = FW_FILE_NAME_E1;
+       else if (CHIP_IS_E1H(bp))
+               fw_file_name = FW_FILE_NAME_E1H;
+       else if (!CHIP_IS_E1x(bp))
+               fw_file_name = FW_FILE_NAME_E2;
+       else {
+               BNX2X_ERR("Unsupported chip revision\n");
+               return -EINVAL;
+       }
+       BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
 
-               rc = request_firmware(&bp->firmware, fw_file_name,
-                                     &bp->pdev->dev);
-               if (rc) {
-                       BNX2X_ERR("Can't load firmware file %s\n",
-                                 fw_file_name);
-                       goto request_firmware_exit;
-               }
+       rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
+       if (rc) {
+               BNX2X_ERR("Can't load firmware file %s\n",
+                         fw_file_name);
+               goto request_firmware_exit;
+       }
 
-               rc = bnx2x_check_firmware(bp);
-               if (rc) {
-                       BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
-                       goto request_firmware_exit;
-               }
+       rc = bnx2x_check_firmware(bp);
+       if (rc) {
+               BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
+               goto request_firmware_exit;
        }
 
        fw_hdr = (struct bnx2x_fw_file_hdr *)bp->firmware->data;
@@ -10901,6 +10899,7 @@ init_ops_alloc_err:
        kfree(bp->init_data);
 request_firmware_exit:
        release_firmware(bp->firmware);
+       bp->firmware = NULL;
 
        return rc;
 }
index cb6339c..94110e9 100644 (file)
@@ -5601,7 +5601,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp,
 
        /* Fill the ramrod data with provided parameters */
        rdata->function_mode = cpu_to_le16(start_params->mf_mode);
-       rdata->sd_vlan_tag   = start_params->sd_vlan_tag;
+       rdata->sd_vlan_tag   = cpu_to_le16(start_params->sd_vlan_tag);
        rdata->path_id       = BP_PATH(bp);
        rdata->network_cos_mode = start_params->network_cos_mode;
 
index 1adef26..a766b25 100644 (file)
@@ -554,23 +554,11 @@ static void bnx2x_bmac_stats_update(struct bnx2x *bp)
                UPDATE_STAT64(tx_stat_gtufl, tx_stat_mac_ufl);
 
                /* collect PFC stats */
-               DIFF_64(diff.hi, new->tx_stat_gtpp_hi,
-                       pstats->pfc_frames_tx_hi,
-                       diff.lo, new->tx_stat_gtpp_lo,
-                       pstats->pfc_frames_tx_lo);
                pstats->pfc_frames_tx_hi = new->tx_stat_gtpp_hi;
                pstats->pfc_frames_tx_lo = new->tx_stat_gtpp_lo;
-               ADD_64(pstats->pfc_frames_tx_hi, diff.hi,
-                       pstats->pfc_frames_tx_lo, diff.lo);
 
-               DIFF_64(diff.hi, new->rx_stat_grpp_hi,
-                       pstats->pfc_frames_rx_hi,
-                       diff.lo, new->rx_stat_grpp_lo,
-                       pstats->pfc_frames_rx_lo);
                pstats->pfc_frames_rx_hi = new->rx_stat_grpp_hi;
                pstats->pfc_frames_rx_lo = new->rx_stat_grpp_lo;
-               ADD_64(pstats->pfc_frames_rx_hi, diff.hi,
-                       pstats->pfc_frames_rx_lo, diff.lo);
        }
 
        estats->pause_frames_received_hi =
index a1f2e0f..35c2a20 100644 (file)
@@ -5352,7 +5352,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
                }
        }
 
-       netdev_completed_queue(tp->dev, pkts_compl, bytes_compl);
+       netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
 
        tnapi->tx_cons = sw_idx;
 
@@ -6793,7 +6793,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        skb_tx_timestamp(skb);
-       netdev_sent_queue(tp->dev, skb->len);
+       netdev_tx_sent_queue(txq, skb->len);
 
        /* Packets are ready, update Tx producer idx local and on card. */
        tw32_tx_mbox(tnapi->prodmbox, entry);
@@ -7275,8 +7275,8 @@ static void tg3_free_rings(struct tg3 *tp)
 
                        dev_kfree_skb_any(skb);
                }
+               netdev_tx_reset_queue(netdev_get_tx_queue(tp->dev, j));
        }
-       netdev_reset_queue(tp->dev);
 }
 
 /* Initialize tx/rx rings for packet processing.
@@ -7886,10 +7886,8 @@ static int tg3_chip_reset(struct tg3 *tp)
        return 0;
 }
 
-static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *,
-                                                struct rtnl_link_stats64 *);
-static struct tg3_ethtool_stats *tg3_get_estats(struct tg3 *,
-                                               struct tg3_ethtool_stats *);
+static void tg3_get_nstats(struct tg3 *, struct rtnl_link_stats64 *);
+static void tg3_get_estats(struct tg3 *, struct tg3_ethtool_stats *);
 
 /* tp->lock is held. */
 static int tg3_halt(struct tg3 *tp, int kind, int silent)
@@ -7910,7 +7908,7 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent)
 
        if (tp->hw_stats) {
                /* Save the stats across chip resets... */
-               tg3_get_stats64(tp->dev, &tp->net_stats_prev),
+               tg3_get_nstats(tp, &tp->net_stats_prev),
                tg3_get_estats(tp, &tp->estats_prev);
 
                /* And make sure the next sample is new data */
@@ -9847,7 +9845,7 @@ static inline u64 get_stat64(tg3_stat64_t *val)
        return ((u64)val->high << 32) | ((u64)val->low);
 }
 
-static u64 calc_crc_errors(struct tg3 *tp)
+static u64 tg3_calc_crc_errors(struct tg3 *tp)
 {
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
 
@@ -9856,14 +9854,12 @@ static u64 calc_crc_errors(struct tg3 *tp)
             GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)) {
                u32 val;
 
-               spin_lock_bh(&tp->lock);
                if (!tg3_readphy(tp, MII_TG3_TEST1, &val)) {
                        tg3_writephy(tp, MII_TG3_TEST1,
                                     val | MII_TG3_TEST1_CRC_EN);
                        tg3_readphy(tp, MII_TG3_RXR_COUNTERS, &val);
                } else
                        val = 0;
-               spin_unlock_bh(&tp->lock);
 
                tp->phy_crc_errors += val;
 
@@ -9877,14 +9873,13 @@ static u64 calc_crc_errors(struct tg3 *tp)
        estats->member =        old_estats->member + \
                                get_stat64(&hw_stats->member)
 
-static struct tg3_ethtool_stats *tg3_get_estats(struct tg3 *tp,
-                                              struct tg3_ethtool_stats *estats)
+static void tg3_get_estats(struct tg3 *tp, struct tg3_ethtool_stats *estats)
 {
        struct tg3_ethtool_stats *old_estats = &tp->estats_prev;
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
 
        if (!hw_stats)
-               return old_estats;
+               return;
 
        ESTAT_ADD(rx_octets);
        ESTAT_ADD(rx_fragments);
@@ -9963,20 +9958,13 @@ static struct tg3_ethtool_stats *tg3_get_estats(struct tg3 *tp,
        ESTAT_ADD(nic_tx_threshold_hit);
 
        ESTAT_ADD(mbuf_lwm_thresh_hit);
-
-       return estats;
 }
 
-static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
-                                                struct rtnl_link_stats64 *stats)
+static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
 {
-       struct tg3 *tp = netdev_priv(dev);
        struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
 
-       if (!hw_stats)
-               return old_stats;
-
        stats->rx_packets = old_stats->rx_packets +
                get_stat64(&hw_stats->rx_ucast_packets) +
                get_stat64(&hw_stats->rx_mcast_packets) +
@@ -10019,15 +10007,13 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
                get_stat64(&hw_stats->tx_carrier_sense_errors);
 
        stats->rx_crc_errors = old_stats->rx_crc_errors +
-               calc_crc_errors(tp);
+               tg3_calc_crc_errors(tp);
 
        stats->rx_missed_errors = old_stats->rx_missed_errors +
                get_stat64(&hw_stats->rx_discards);
 
        stats->rx_dropped = tp->rx_dropped;
        stats->tx_dropped = tp->tx_dropped;
-
-       return stats;
 }
 
 static inline u32 calc_crc(unsigned char *buf, int len)
@@ -15409,6 +15395,21 @@ static void __devinit tg3_init_coal(struct tg3 *tp)
        }
 }
 
+static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
+                                               struct rtnl_link_stats64 *stats)
+{
+       struct tg3 *tp = netdev_priv(dev);
+
+       if (!tp->hw_stats)
+               return &tp->net_stats_prev;
+
+       spin_lock_bh(&tp->lock);
+       tg3_get_nstats(tp, stats);
+       spin_unlock_bh(&tp->lock);
+
+       return stats;
+}
+
 static const struct net_device_ops tg3_netdev_ops = {
        .ndo_open               = tg3_open,
        .ndo_stop               = tg3_close,
index e83d12c..9d76e59 100644 (file)
@@ -196,6 +196,8 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
        CH_DEVICE(0x4408, 4),
        CH_DEVICE(0x4409, 4),
        CH_DEVICE(0x440a, 4),
+       CH_DEVICE(0x440d, 4),
+       CH_DEVICE(0x440e, 4),
        { 0, }
 };
 
index e53365a..d963c1d 100644 (file)
@@ -2892,6 +2892,8 @@ static struct pci_device_id cxgb4vf_pci_tbl[] = {
        CH_DEVICE(0x4808, 0),   /* T420-cx */
        CH_DEVICE(0x4809, 0),   /* T420-bt */
        CH_DEVICE(0x480a, 0),   /* T404-bt */
+       CH_DEVICE(0x480d, 0),   /* T480-cr */
+       CH_DEVICE(0x480e, 0),   /* T440-lp-cr */
        { 0, }
 };
 
index ee93a20..c52295c 100644 (file)
@@ -94,7 +94,7 @@ struct enic {
        u32 rx_coalesce_usecs;
        u32 tx_coalesce_usecs;
 #ifdef CONFIG_PCI_IOV
-       u32 num_vfs;
+       u16 num_vfs;
 #endif
        struct enic_port_profile *pp;
 
index ab3f67f..0e4edd3 100644 (file)
@@ -2370,7 +2370,7 @@ static int __devinit enic_probe(struct pci_dev *pdev,
        pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
        if (pos) {
                pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF,
-                       (u16 *)&enic->num_vfs);
+                       &enic->num_vfs);
                if (enic->num_vfs) {
                        err = pci_enable_sriov(pdev, enic->num_vfs);
                        if (err) {
index 5d5fb26..e6893cd 100644 (file)
@@ -336,7 +336,9 @@ static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev,
        stats->tx_bytes = tx_bytes;
        stats->rx_packets = rx_packets;
 
-       return &port->stats;
+       stats->multicast = port->stats.multicast;
+       stats->rx_errors = port->stats.rx_errors;
+       return stats;
 }
 
 static void ehea_update_stats(struct work_struct *work)
index 738f950..fb2b367 100644 (file)
@@ -151,11 +151,6 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
        }
 
-       port = ((context->pri_path.sched_queue >> 6) & 1) + 1;
-       if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
-               context->pri_path.sched_queue = (context->pri_path.sched_queue &
-                                               0xc3);
-
        *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
        memcpy(mailbox->buf + 8, context, sizeof *context);
 
index bfdb7af..8752e6e 100644 (file)
@@ -2255,8 +2255,7 @@ int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
 
        if (vhcr->op_modifier == 0) {
                err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq);
-               if (err)
-                       goto ex_put;
+               goto ex_put;
        }
 
        err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
index 9cb5f91..29e23be 100644 (file)
@@ -321,10 +321,10 @@ static void pch_gbe_check_copper_options(struct pch_gbe_adapter *adapter)
                        pr_debug("AutoNeg specified along with Speed or Duplex, AutoNeg parameter ignored\n");
                        hw->phy.autoneg_advertised = opt.def;
                } else {
-                       hw->phy.autoneg_advertised = AutoNeg;
-                       pch_gbe_validate_option(
-                               (int *)(&hw->phy.autoneg_advertised),
-                               &opt, adapter);
+                       int tmp = AutoNeg;
+
+                       pch_gbe_validate_option(&tmp, &opt, adapter);
+                       hw->phy.autoneg_advertised = tmp;
                }
        }
 
@@ -495,9 +495,10 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
                        .arg  = { .l = { .nr = (int)ARRAY_SIZE(fc_list),
                                         .p = fc_list } }
                };
-               hw->mac.fc = FlowControl;
-               pch_gbe_validate_option((int *)(&hw->mac.fc),
-                                               &opt, adapter);
+               int tmp = FlowControl;
+
+               pch_gbe_validate_option(&tmp, &opt, adapter);
+               hw->mac.fc = tmp;
        }
 
        pch_gbe_check_copper_options(adapter);
index b97132d..8f29feb 100644 (file)
@@ -4,6 +4,7 @@
 
 config NET_PACKET_ENGINE
        bool "Packet Engine devices"
+       default y
        depends on PCI
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y
index 7931531..e61560e 100644 (file)
@@ -3017,7 +3017,6 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
                (void __iomem *)port_regs;
        u32 delay = 10;
        int status = 0;
-       unsigned long hw_flags = 0;
 
        if (ql_mii_setup(qdev))
                return -1;
@@ -3228,9 +3227,9 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
                value = ql_read_page0_reg(qdev, &port_regs->portStatus);
                if (value & PORT_STATUS_IC)
                        break;
-               spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
+               spin_unlock_irq(&qdev->hw_lock);
                msleep(500);
-               spin_lock_irqsave(&qdev->hw_lock, hw_flags);
+               spin_lock_irq(&qdev->hw_lock);
        } while (--delay);
 
        if (delay == 0) {
index 7a0c800..bbacb37 100644 (file)
@@ -3781,12 +3781,20 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
+       void __iomem *ioaddr = tp->mmio_addr;
+
+       RTL_W8(Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.enable);
+       RTL_W8(Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
+       void __iomem *ioaddr = tp->mmio_addr;
+
+       RTL_W8(Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.disable);
+       RTL_W8(Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -6186,6 +6194,9 @@ static void rtl_shutdown(struct pci_dev *pdev)
 {
        struct net_device *dev = pci_get_drvdata(pdev);
        struct rtl8169_private *tp = netdev_priv(dev);
+       struct device *d = &pdev->dev;
+
+       pm_runtime_get_sync(d);
 
        rtl8169_net_suspend(dev);
 
@@ -6207,6 +6218,8 @@ static void rtl_shutdown(struct pci_dev *pdev)
                pci_wake_from_d3(pdev, true);
                pci_set_power_state(pdev, PCI_D3hot);
        }
+
+       pm_runtime_put_noidle(d);
 }
 
 static struct pci_driver rtl8169_pci_driver = {
index 466c58a..610860f 100644 (file)
@@ -166,7 +166,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
                dev_kfree_skb(skb);
                net->stats.tx_dropped++;
-               return NETDEV_TX_BUSY;
+               return NETDEV_TX_OK;
        }
 
        packet->extension = (void *)(unsigned long)packet +
@@ -226,7 +226,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
                dev_kfree_skb_any(skb);
        }
 
-       return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
+       return NETDEV_TX_OK;
 }
 
 /*
@@ -313,7 +313,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 static void netvsc_get_drvinfo(struct net_device *net,
                               struct ethtool_drvinfo *info)
 {
-       strcpy(info->driver, "hv_netvsc");
+       strcpy(info->driver, KBUILD_MODNAME);
        strcpy(info->version, HV_DRV_VERSION);
        strcpy(info->fw_version, "N/A");
 }
@@ -485,7 +485,7 @@ MODULE_DEVICE_TABLE(vmbus, id_table);
 
 /* The one and only one */
 static struct  hv_driver netvsc_drv = {
-       .name = "netvsc",
+       .name = KBUILD_MODNAME,
        .id_table = id_table,
        .probe = netvsc_probe,
        .remove = netvsc_remove,
index 93c5d72..2d7601d 100644 (file)
@@ -359,7 +359,7 @@ static void tun_free_netdev(struct net_device *dev)
 {
        struct tun_struct *tun = netdev_priv(dev);
 
-       sock_put(tun->socket.sk);
+       sk_release_kernel(tun->socket.sk);
 }
 
 /* Net device open. */
@@ -980,10 +980,18 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
        return ret;
 }
 
+static int tun_release(struct socket *sock)
+{
+       if (sock->sk)
+               sock_put(sock->sk);
+       return 0;
+}
+
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops tun_socket_ops = {
        .sendmsg = tun_sendmsg,
        .recvmsg = tun_recvmsg,
+       .release = tun_release,
 };
 
 static struct proto tun_proto = {
@@ -1110,10 +1118,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
                err = -ENOMEM;
-               sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
+               sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
                if (!sk)
                        goto err_free_dev;
 
+               sk_change_net(sk, net);
                tun->socket.wq = &tun->wq;
                init_waitqueue_head(&tun->wq.wait);
                tun->socket.ops = &tun_socket_ops;
@@ -1174,7 +1183,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
        return 0;
 
  err_free_sk:
-       sock_put(sk);
+       tun_free_netdev(dev);
  err_free_dev:
        free_netdev(dev);
  failed:
index 8e84f5b..d6da5ee 100644 (file)
@@ -1599,6 +1599,10 @@ static const struct usb_device_id        products [] = {
        USB_DEVICE (0x6189, 0x182d),
        .driver_info =  (unsigned long) &ax8817x_info,
 }, {
+       // Sitecom LN-031 "USB 2.0 10/100/1000 Ethernet adapter"
+       USB_DEVICE (0x0df6, 0x0056),
+       .driver_info =  (unsigned long) &ax88178_info,
+}, {
        // corega FEther USB2-TX
        USB_DEVICE (0x07aa, 0x0017),
        .driver_info =  (unsigned long) &ax8817x_info,
index fae0fbd..59681f0 100644 (file)
@@ -328,13 +328,13 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
        unsigned long           lockflags;
        size_t                  size = dev->rx_urb_size;
 
-       if ((skb = alloc_skb (size + NET_IP_ALIGN, flags)) == NULL) {
+       skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+       if (!skb) {
                netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
                usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
                usb_free_urb (urb);
                return -ENOMEM;
        }
-       skb_reserve (skb, NET_IP_ALIGN);
 
        entry = (struct skb_data *) skb->cb;
        entry->urb = urb;
@@ -589,6 +589,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
                entry = (struct skb_data *) skb->cb;
                urb = entry->urb;
 
+               spin_unlock_irqrestore(&q->lock, flags);
                // during some PM-driven resume scenarios,
                // these (async) unlinks complete immediately
                retval = usb_unlink_urb (urb);
@@ -596,6 +597,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
                        netdev_dbg(dev->net, "unlink urb err, %d\n", retval);
                else
                        count++;
+               spin_lock_irqsave(&q->lock, flags);
        }
        spin_unlock_irqrestore (&q->lock, flags);
        return count;
index 3dcd385..756c0f5 100644 (file)
@@ -830,13 +830,8 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                                        ctx->l4_hdr_size = ((struct tcphdr *)
                                           skb_transport_header(skb))->doff * 4;
                                else if (iph->protocol == IPPROTO_UDP)
-                                       /*
-                                        * Use tcp header size so that bytes to
-                                        * be copied are more than required by
-                                        * the device.
-                                        */
                                        ctx->l4_hdr_size =
-                                                       sizeof(struct tcphdr);
+                                                       sizeof(struct udphdr);
                                else
                                        ctx->l4_hdr_size = 0;
                        } else {
index ed54797..fc46a81 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.1.18.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.1.29.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01011200
+#define VMXNET3_DRIVER_VERSION_NUM      0x01011D00
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index 64a1106..63e4b70 100644 (file)
@@ -367,38 +367,28 @@ netdev_tx_t i2400m_hard_start_xmit(struct sk_buff *skb,
 {
        struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
        struct device *dev = i2400m_dev(i2400m);
-       int result;
+       int result = -1;
 
        d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
-       if (skb_header_cloned(skb)) {
-               /*
-                * Make tcpdump/wireshark happy -- if they are
-                * running, the skb is cloned and we will overwrite
-                * the mac fields in i2400m_tx_prep_header. Expand
-                * seems to fix this...
-                */
-               result = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-               if (result) {
-                       result = NETDEV_TX_BUSY;
-                       goto error_expand;
-               }
-       }
+
+       if (skb_header_cloned(skb) && 
+           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+               goto drop;
 
        if (i2400m->state == I2400M_SS_IDLE)
                result = i2400m_net_wake_tx(i2400m, net_dev, skb);
        else
                result = i2400m_net_tx(i2400m, net_dev, skb);
-       if (result <  0)
+       if (result <  0) {
+drop:
                net_dev->stats.tx_dropped++;
-       else {
+       } else {
                net_dev->stats.tx_packets++;
                net_dev->stats.tx_bytes += skb->len;
        }
-       result = NETDEV_TX_OK;
-error_expand:
-       kfree_skb(skb);
+       dev_kfree_skb(skb);
        d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
-       return result;
+       return NETDEV_TX_OK;
 }
 
 
index f901a17..86a891f 100644 (file)
@@ -489,8 +489,6 @@ static int ar5008_hw_rf_alloc_ext_banks(struct ath_hw *ah)
        ATH_ALLOC_BANK(ah->analogBank6Data, ah->iniBank6.ia_rows);
        ATH_ALLOC_BANK(ah->analogBank6TPCData, ah->iniBank6TPC.ia_rows);
        ATH_ALLOC_BANK(ah->analogBank7Data, ah->iniBank7.ia_rows);
-       ATH_ALLOC_BANK(ah->addac5416_21,
-                      ah->iniAddac.ia_rows * ah->iniAddac.ia_columns);
        ATH_ALLOC_BANK(ah->bank6Temp, ah->iniBank6.ia_rows);
 
        return 0;
@@ -519,7 +517,6 @@ static void ar5008_hw_rf_free_ext_banks(struct ath_hw *ah)
        ATH_FREE_BANK(ah->analogBank6Data);
        ATH_FREE_BANK(ah->analogBank6TPCData);
        ATH_FREE_BANK(ah->analogBank7Data);
-       ATH_FREE_BANK(ah->addac5416_21);
        ATH_FREE_BANK(ah->bank6Temp);
 
 #undef ATH_FREE_BANK
@@ -805,27 +802,7 @@ static int ar5008_hw_process_ini(struct ath_hw *ah,
        if (ah->eep_ops->set_addac)
                ah->eep_ops->set_addac(ah, chan);
 
-       if (AR_SREV_5416_22_OR_LATER(ah)) {
-               REG_WRITE_ARRAY(&ah->iniAddac, 1, regWrites);
-       } else {
-               struct ar5416IniArray temp;
-               u32 addacSize =
-                       sizeof(u32) * ah->iniAddac.ia_rows *
-                       ah->iniAddac.ia_columns;
-
-               /* For AR5416 2.0/2.1 */
-               memcpy(ah->addac5416_21,
-                      ah->iniAddac.ia_array, addacSize);
-
-               /* override CLKDRV value at [row, column] = [31, 1] */
-               (ah->addac5416_21)[31 * ah->iniAddac.ia_columns + 1] = 0;
-
-               temp.ia_array = ah->addac5416_21;
-               temp.ia_columns = ah->iniAddac.ia_columns;
-               temp.ia_rows = ah->iniAddac.ia_rows;
-               REG_WRITE_ARRAY(&temp, 1, regWrites);
-       }
-
+       REG_WRITE_ARRAY(&ah->iniAddac, 1, regWrites);
        REG_WRITE(ah, AR_PHY_ADC_SERIAL_CTL, AR_PHY_SEL_INTERNAL_ADDAC);
 
        ENABLE_REGWRITE_BUFFER(ah);
index 11f192a..d190411 100644 (file)
@@ -180,6 +180,25 @@ static void ar9002_hw_init_mode_regs(struct ath_hw *ah)
                INIT_INI_ARRAY(&ah->iniAddac, ar5416Addac,
                               ARRAY_SIZE(ar5416Addac), 2);
        }
+
+       /* iniAddac needs to be modified for these chips */
+       if (AR_SREV_9160(ah) || !AR_SREV_5416_22_OR_LATER(ah)) {
+               struct ar5416IniArray *addac = &ah->iniAddac;
+               u32 size = sizeof(u32) * addac->ia_rows * addac->ia_columns;
+               u32 *data;
+
+               data = kmalloc(size, GFP_KERNEL);
+               if (!data)
+                       return;
+
+               memcpy(data, addac->ia_array, size);
+               addac->ia_array = data;
+
+               if (!AR_SREV_5416_22_OR_LATER(ah)) {
+                       /* override CLKDRV value */
+                       INI_RA(addac, 31,1) = 0;
+               }
+       }
 }
 
 /* Support for Japan ch.14 (2484) spread */
index 6a29004..c8261d4 100644 (file)
@@ -940,7 +940,6 @@ struct ath_hw {
        u32 *analogBank6Data;
        u32 *analogBank6TPCData;
        u32 *analogBank7Data;
-       u32 *addac5416_21;
        u32 *bank6Temp;
 
        u8 txpower_limit;
index d19a9ee..bbc813d 100644 (file)
@@ -1234,6 +1234,7 @@ static bool carl9170_tx_ps_drop(struct ar9170 *ar, struct sk_buff *skb)
 {
        struct ieee80211_sta *sta;
        struct carl9170_sta_info *sta_info;
+       struct ieee80211_tx_info *tx_info;
 
        rcu_read_lock();
        sta = __carl9170_get_tx_sta(ar, skb);
@@ -1241,16 +1242,18 @@ static bool carl9170_tx_ps_drop(struct ar9170 *ar, struct sk_buff *skb)
                goto out_rcu;
 
        sta_info = (void *) sta->drv_priv;
-       if (unlikely(sta_info->sleeping)) {
-               struct ieee80211_tx_info *tx_info;
+       tx_info = IEEE80211_SKB_CB(skb);
 
+       if (unlikely(sta_info->sleeping) &&
+           !(tx_info->flags & (IEEE80211_TX_CTL_POLL_RESPONSE |
+                               IEEE80211_TX_CTL_CLEAR_PS_FILT))) {
                rcu_read_unlock();
 
-               tx_info = IEEE80211_SKB_CB(skb);
                if (tx_info->flags & IEEE80211_TX_CTL_AMPDU)
                        atomic_dec(&ar->tx_ampdu_upload);
 
                tx_info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
+               carl9170_release_dev_space(ar, skb);
                carl9170_tx_status(ar, skb, false);
                return true;
        }
index 90911ee..30b5887 100644 (file)
@@ -1051,17 +1051,13 @@ brcms_c_ampdu_dotxstatus_complete(struct ampdu_info *ampdu, struct scb *scb,
                }
                /* either retransmit or send bar if ack not recd */
                if (!ack_recd) {
-                       struct ieee80211_tx_rate *txrate =
-                           tx_info->status.rates;
-                       if (retry && (txrate[0].count < (int)retry_limit)) {
+                       if (retry && (ini->txretry[index] < (int)retry_limit)) {
                                ini->txretry[index]++;
                                ini->tx_in_transit--;
                                /*
                                 * Use high prededence for retransmit to
                                 * give some punch
                                 */
-                               /* brcms_c_txq_enq(wlc, scb, p,
-                                * BRCMS_PRIO_TO_PREC(tid)); */
                                brcms_c_txq_enq(wlc, scb, p,
                                                BRCMS_PRIO_TO_HI_PREC(tid));
                        } else {
@@ -1074,9 +1070,9 @@ brcms_c_ampdu_dotxstatus_complete(struct ampdu_info *ampdu, struct scb *scb,
                                    IEEE80211_TX_STAT_AMPDU_NO_BACK;
                                skb_pull(p, D11_PHY_HDR_LEN);
                                skb_pull(p, D11_TXH_LEN);
-                               wiphy_err(wiphy, "%s: BA Timeout, seq %d, in_"
-                                       "transit %d\n", "AMPDU status", seq,
-                                       ini->tx_in_transit);
+                               BCMMSG(wiphy,
+                                      "BA Timeout, seq %d, in_transit %d\n",
+                                      seq, ini->tx_in_transit);
                                ieee80211_tx_status_irqsafe(wlc->pub->ieee_hw,
                                                            p);
                        }
index 54b2d39..a7dfba8 100644 (file)
@@ -2475,7 +2475,7 @@ il3945_bg_alive_start(struct work_struct *data)
            container_of(data, struct il_priv, alive_start.work);
 
        mutex_lock(&il->mutex);
-       if (test_bit(S_EXIT_PENDING, &il->status))
+       if (test_bit(S_EXIT_PENDING, &il->status) || il->txq == NULL)
                goto out;
 
        il3945_alive_start(il);
index 1489b15..c80eb9b 100644 (file)
@@ -1870,11 +1870,12 @@ il3945_bg_reg_txpower_periodic(struct work_struct *work)
        struct il_priv *il = container_of(work, struct il_priv,
                                          _3945.thermal_periodic.work);
 
-       if (test_bit(S_EXIT_PENDING, &il->status))
-               return;
-
        mutex_lock(&il->mutex);
+       if (test_bit(S_EXIT_PENDING, &il->status) || il->txq == NULL)
+               goto out;
+
        il3945_reg_txpower_periodic(il);
+out:
        mutex_unlock(&il->mutex);
 }
 
index 64cf439..ca78e91 100644 (file)
@@ -1240,7 +1240,7 @@ int iwlagn_suspend(struct iwl_priv *priv,
                                .flags = CMD_SYNC,
                                .data[0] = key_data.rsc_tsc,
                                .dataflags[0] = IWL_HCMD_DFL_NOCOPY,
-                               .len[0] = sizeof(key_data.rsc_tsc),
+                               .len[0] = sizeof(*key_data.rsc_tsc),
                        };
 
                        ret = iwl_trans_send_cmd(trans(priv), &rsc_tsc_cmd);
index 7353826..e483cfa 100644 (file)
@@ -1187,6 +1187,7 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv,
        unsigned long flags;
        struct iwl_addsta_cmd sta_cmd;
        u8 sta_id = iwlagn_key_sta_id(priv, ctx->vif, sta);
+       __le16 key_flags;
 
        /* if station isn't there, neither is the key */
        if (sta_id == IWL_INVALID_STATION)
@@ -1212,7 +1213,14 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv,
                IWL_ERR(priv, "offset %d not used in uCode key table.\n",
                        keyconf->hw_key_idx);
 
-       sta_cmd.key.key_flags = STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID;
+       key_flags = cpu_to_le16(keyconf->keyidx << STA_KEY_FLG_KEYID_POS);
+       key_flags |= STA_KEY_FLG_MAP_KEY_MSK | STA_KEY_FLG_NO_ENC |
+                    STA_KEY_FLG_INVALID;
+
+       if (!(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+               key_flags |= STA_KEY_MULTICAST_MSK;
+
+       sta_cmd.key.key_flags = key_flags;
        sta_cmd.key.key_offset = WEP_INVALID_OFFSET;
        sta_cmd.sta.modify_mask = STA_MODIFY_KEY_MASK;
        sta_cmd.mode = STA_CONTROL_MODIFY_MSK;
index 5b2972b..2210a0f 100644 (file)
@@ -846,6 +846,7 @@ mwifiex_cfg80211_assoc(struct mwifiex_private *priv, size_t ssid_len, u8 *ssid,
        priv->sec_info.wpa_enabled = false;
        priv->sec_info.wpa2_enabled = false;
        priv->wep_key_curr_index = 0;
+       priv->sec_info.encryption_mode = 0;
        ret = mwifiex_set_encode(priv, NULL, 0, 0, 1);
 
        if (mode == NL80211_IFTYPE_ADHOC) {
index c3e1aa7..fd356b7 100644 (file)
@@ -426,10 +426,14 @@ void rt2x00lib_txdone(struct queue_entry *entry,
        /*
         * If the data queue was below the threshold before the txdone
         * handler we must make sure the packet queue in the mac80211 stack
-        * is reenabled when the txdone handler has finished.
+        * is reenabled when the txdone handler has finished. This has to be
+        * serialized with rt2x00mac_tx(), otherwise we can wake up queue
+        * before it was stopped.
         */
+       spin_lock_bh(&entry->queue->tx_lock);
        if (!rt2x00queue_threshold(entry->queue))
                rt2x00queue_unpause_queue(entry->queue);
+       spin_unlock_bh(&entry->queue->tx_lock);
 }
 EXPORT_SYMBOL_GPL(rt2x00lib_txdone);
 
@@ -1220,7 +1224,8 @@ void rt2x00lib_remove_dev(struct rt2x00_dev *rt2x00dev)
                cancel_work_sync(&rt2x00dev->rxdone_work);
                cancel_work_sync(&rt2x00dev->txdone_work);
        }
-       destroy_workqueue(rt2x00dev->workqueue);
+       if (rt2x00dev->workqueue)
+               destroy_workqueue(rt2x00dev->workqueue);
 
        /*
         * Free the tx status fifo.
index ede3c58..2df2eb6 100644 (file)
@@ -152,13 +152,22 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
        if (unlikely(rt2x00queue_write_tx_frame(queue, skb, false)))
                goto exit_fail;
 
+       /*
+        * Pausing queue has to be serialized with rt2x00lib_txdone(). Note
+        * we should not use spin_lock_bh variant as bottom halve was already
+        * disabled before ieee80211_xmit() call.
+        */
+       spin_lock(&queue->tx_lock);
        if (rt2x00queue_threshold(queue))
                rt2x00queue_pause_queue(queue);
+       spin_unlock(&queue->tx_lock);
 
        return;
 
  exit_fail:
+       spin_lock(&queue->tx_lock);
        rt2x00queue_pause_queue(queue);
+       spin_unlock(&queue->tx_lock);
  exit_free_skb:
        ieee80211_free_txskb(hw, skb);
 }
index 5adfb3e..9b1b2b7 100644 (file)
@@ -619,6 +619,9 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb,
        else if (test_bit(REQUIRE_DMA, &queue->rt2x00dev->cap_flags))
                rt2x00queue_align_frame(skb);
 
+       /*
+        * That function must be called with bh disabled.
+        */
        spin_lock(&queue->tx_lock);
 
        if (unlikely(rt2x00queue_full(queue))) {
index ea2bd1b..91a375f 100644 (file)
@@ -23,7 +23,6 @@
 #include <asm/machdep.h>
 #endif /* CONFIG_PPC */
 
-#include <asm/setup.h>
 #include <asm/page.h>
 
 char *of_fdt_get_string(struct boot_param_header *blob, u32 offset)
index 980c079..483c0ad 100644 (file)
@@ -182,7 +182,7 @@ struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
        if (!phy_id || sz < sizeof(*phy_id))
                return NULL;
 
-       sprintf(bus_id, PHY_ID_FMT, "0", be32_to_cpu(phy_id[0]));
+       sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
 
        phy = phy_connect(dev, bus_id, hndlr, 0, iface);
        return IS_ERR(phy) ? NULL : phy;
index 1cfbf22..24f049e 100644 (file)
@@ -500,6 +500,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
        int pos;
        u32 reg32;
 
+       if (aspm_disabled)
+               return 0;
+
        /*
         * Some functions in a slot might not all be PCIe functions,
         * very strange. Disable ASPM for the whole slot
index f995e6e..15dbd8c 100644 (file)
@@ -143,6 +143,30 @@ config FUJITSU_LAPTOP_DEBUG
 
          If you are not sure, say N here.
 
+config FUJITSU_TABLET
+       tristate "Fujitsu Tablet Extras"
+       depends on ACPI
+       depends on INPUT
+       ---help---
+         This is a driver for tablets built by Fujitsu:
+
+           * Lifebook P1510/P1610/P1620/Txxxx
+           * Stylistic ST5xxx
+           * Possibly other Fujitsu tablet models
+
+         It adds support for the panel buttons, docking station detection,
+         tablet/notebook mode detection for convertible and
+         orientation detection for docked slates.
+
+         If you have a Fujitsu convertible or slate, say Y or M here.
+
+config AMILO_RFKILL
+       tristate "Fujitsu-Siemens Amilo rfkill support"
+       depends on RFKILL
+       ---help---
+         This is a driver for enabling wifi on some Fujitsu-Siemens Amilo
+         laptops.
+
 config TC1100_WMI
        tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)"
        depends on !X86_64
index 293a320..d328f21 100644 (file)
@@ -17,12 +17,14 @@ obj-$(CONFIG_ACER_WMI)              += acer-wmi.o
 obj-$(CONFIG_ACERHDF)          += acerhdf.o
 obj-$(CONFIG_HP_ACCEL)         += hp_accel.o
 obj-$(CONFIG_HP_WMI)           += hp-wmi.o
+obj-$(CONFIG_AMILO_RFKILL)     += amilo-rfkill.o
 obj-$(CONFIG_TC1100_WMI)       += tc1100-wmi.o
 obj-$(CONFIG_SONY_LAPTOP)      += sony-laptop.o
 obj-$(CONFIG_IDEAPAD_LAPTOP)   += ideapad-laptop.o
 obj-$(CONFIG_THINKPAD_ACPI)    += thinkpad_acpi.o
 obj-$(CONFIG_SENSORS_HDAPS)    += hdaps.o
 obj-$(CONFIG_FUJITSU_LAPTOP)   += fujitsu-laptop.o
+obj-$(CONFIG_FUJITSU_TABLET)   += fujitsu-tablet.o
 obj-$(CONFIG_PANASONIC_LAPTOP) += panasonic-laptop.o
 obj-$(CONFIG_INTEL_MENLOW)     += intel_menlow.o
 obj-$(CONFIG_ACPI_WMI)         += wmi.o
index b848277..1e5290b 100644 (file)
@@ -679,6 +679,32 @@ static acpi_status AMW0_find_mailled(void)
        return AE_OK;
 }
 
+static int AMW0_set_cap_acpi_check_device_found;
+
+static acpi_status AMW0_set_cap_acpi_check_device_cb(acpi_handle handle,
+       u32 level, void *context, void **retval)
+{
+       AMW0_set_cap_acpi_check_device_found = 1;
+       return AE_OK;
+}
+
+static const struct acpi_device_id norfkill_ids[] = {
+       { "VPC2004", 0},
+       { "IBM0068", 0},
+       { "LEN0068", 0},
+       { "", 0},
+};
+
+static int AMW0_set_cap_acpi_check_device(void)
+{
+       const struct acpi_device_id *id;
+
+       for (id = norfkill_ids; id->id[0]; id++)
+               acpi_get_devices(id->id, AMW0_set_cap_acpi_check_device_cb,
+                               NULL, NULL);
+       return AMW0_set_cap_acpi_check_device_found;
+}
+
 static acpi_status AMW0_set_capabilities(void)
 {
        struct wmab_args args;
@@ -692,7 +718,9 @@ static acpi_status AMW0_set_capabilities(void)
         * work.
         */
        if (wmi_has_guid(AMW0_GUID2)) {
-               interface->capability |= ACER_CAP_WIRELESS;
+               if ((quirks != &quirk_unknown) ||
+                   !AMW0_set_cap_acpi_check_device())
+                       interface->capability |= ACER_CAP_WIRELESS;
                return AE_OK;
        }
 
diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c
new file mode 100644 (file)
index 0000000..19170bb
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Support for rfkill on some Fujitsu-Siemens Amilo laptops.
+ * Copyright 2011 Ben Hutchings.
+ *
+ * Based in part on the fsam7440 driver, which is:
+ * Copyright 2005 Alejandro Vidal Mata & Javier Vidal Mata.
+ * and on the fsaa1655g driver, which is:
+ * Copyright 2006 Martin Večeřa.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/i8042.h>
+#include <linux/io.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/rfkill.h>
+
+/*
+ * These values were obtained from disassembling and debugging the
+ * PM.exe program installed in the Fujitsu-Siemens AMILO A1655G
+ */
+#define A1655_WIFI_COMMAND     0x10C5
+#define A1655_WIFI_ON          0x25
+#define A1655_WIFI_OFF         0x45
+
+static int amilo_a1655_rfkill_set_block(void *data, bool blocked)
+{
+       u8 param = blocked ? A1655_WIFI_OFF : A1655_WIFI_ON;
+       int rc;
+
+       i8042_lock_chip();
+       rc = i8042_command(&param, A1655_WIFI_COMMAND);
+       i8042_unlock_chip();
+       return rc;
+}
+
+static const struct rfkill_ops amilo_a1655_rfkill_ops = {
+       .set_block = amilo_a1655_rfkill_set_block
+};
+
+/*
+ * These values were obtained from disassembling the PM.exe program
+ * installed in the Fujitsu-Siemens AMILO M 7440
+ */
+#define M7440_PORT1            0x118f
+#define M7440_PORT2            0x118e
+#define M7440_RADIO_ON1                0x12
+#define M7440_RADIO_ON2                0x80
+#define M7440_RADIO_OFF1       0x10
+#define M7440_RADIO_OFF2       0x00
+
+static int amilo_m7440_rfkill_set_block(void *data, bool blocked)
+{
+       u8 val1 = blocked ? M7440_RADIO_OFF1 : M7440_RADIO_ON1;
+       u8 val2 = blocked ? M7440_RADIO_OFF2 : M7440_RADIO_ON2;
+
+       outb(val1, M7440_PORT1);
+       outb(val2, M7440_PORT2);
+
+       /* Check whether the state has changed correctly */
+       if (inb(M7440_PORT1) != val1 || inb(M7440_PORT2) != val2)
+               return -EIO;
+
+       return 0;
+}
+
+static const struct rfkill_ops amilo_m7440_rfkill_ops = {
+       .set_block = amilo_m7440_rfkill_set_block
+};
+
+static const struct dmi_system_id __devinitdata amilo_rfkill_id_table[] = {
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                       DMI_MATCH(DMI_BOARD_NAME, "AMILO A1655"),
+               },
+               .driver_data = (void *)&amilo_a1655_rfkill_ops
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                       DMI_MATCH(DMI_BOARD_NAME, "AMILO M7440"),
+               },
+               .driver_data = (void *)&amilo_m7440_rfkill_ops
+       },
+       {}
+};
+
+static struct platform_device *amilo_rfkill_pdev;
+static struct rfkill *amilo_rfkill_dev;
+
+static int __devinit amilo_rfkill_probe(struct platform_device *device)
+{
+       const struct dmi_system_id *system_id =
+               dmi_first_match(amilo_rfkill_id_table);
+       int rc;
+
+       amilo_rfkill_dev = rfkill_alloc(KBUILD_MODNAME, &device->dev,
+                                       RFKILL_TYPE_WLAN,
+                                       system_id->driver_data, NULL);
+       if (!amilo_rfkill_dev)
+               return -ENOMEM;
+
+       rc = rfkill_register(amilo_rfkill_dev);
+       if (rc)
+               goto fail;
+
+       return 0;
+
+fail:
+       rfkill_destroy(amilo_rfkill_dev);
+       return rc;
+}
+
+static int amilo_rfkill_remove(struct platform_device *device)
+{
+       rfkill_unregister(amilo_rfkill_dev);
+       rfkill_destroy(amilo_rfkill_dev);
+       return 0;
+}
+
+static struct platform_driver amilo_rfkill_driver = {
+       .driver = {
+               .name   = KBUILD_MODNAME,
+               .owner  = THIS_MODULE,
+       },
+       .probe  = amilo_rfkill_probe,
+       .remove = amilo_rfkill_remove,
+};
+
+static int __init amilo_rfkill_init(void)
+{
+       int rc;
+
+       if (dmi_first_match(amilo_rfkill_id_table) == NULL)
+               return -ENODEV;
+
+       rc = platform_driver_register(&amilo_rfkill_driver);
+       if (rc)
+               return rc;
+
+       amilo_rfkill_pdev = platform_device_register_simple(KBUILD_MODNAME, -1,
+                                                           NULL, 0);
+       if (IS_ERR(amilo_rfkill_pdev)) {
+               rc = PTR_ERR(amilo_rfkill_pdev);
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       platform_driver_unregister(&amilo_rfkill_driver);
+       return rc;
+}
+
+static void __exit amilo_rfkill_exit(void)
+{
+       platform_device_unregister(amilo_rfkill_pdev);
+       platform_driver_unregister(&amilo_rfkill_driver);
+}
+
+MODULE_AUTHOR("Ben Hutchings <ben@decadent.org.uk>");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(dmi, amilo_rfkill_id_table);
+
+module_init(amilo_rfkill_init);
+module_exit(amilo_rfkill_exit);
diff --git a/drivers/platform/x86/fujitsu-tablet.c b/drivers/platform/x86/fujitsu-tablet.c
new file mode 100644 (file)
index 0000000..580d80a
--- /dev/null
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C) 2006-2012 Robert Gerlach <khnz@gmx.de>
+ * Copyright (C) 2005-2006 Jan Rychter <jan@rychter.com>
+ *
+ * You can redistribute and/or modify this program under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+ * Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+
+#define MODULENAME "fujitsu-tablet"
+
+#define ACPI_FUJITSU_CLASS "fujitsu"
+
+#define INVERT_TABLET_MODE_BIT      0x01
+#define FORCE_TABLET_MODE_IF_UNDOCK 0x02
+
+#define KEYMAP_LEN 16
+
+static const struct acpi_device_id fujitsu_ids[] = {
+       { .id = "FUJ02BD" },
+       { .id = "FUJ02BF" },
+       { .id = "" }
+};
+
+struct fujitsu_config {
+       unsigned short keymap[KEYMAP_LEN];
+       unsigned int quirks;
+};
+
+static unsigned short keymap_Lifebook_Tseries[KEYMAP_LEN] __initconst = {
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_SCROLLDOWN,
+       KEY_SCROLLUP,
+       KEY_DIRECTION,
+       KEY_LEFTCTRL,
+       KEY_BRIGHTNESSUP,
+       KEY_BRIGHTNESSDOWN,
+       KEY_BRIGHTNESS_ZERO,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_LEFTALT
+};
+
+static unsigned short keymap_Lifebook_U810[KEYMAP_LEN] __initconst = {
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_PROG1,
+       KEY_PROG2,
+       KEY_DIRECTION,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_UP,
+       KEY_DOWN,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_LEFTCTRL,
+       KEY_LEFTALT
+};
+
+static unsigned short keymap_Stylistic_Tseries[KEYMAP_LEN] __initconst = {
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_PRINT,
+       KEY_BACKSPACE,
+       KEY_SPACE,
+       KEY_ENTER,
+       KEY_BRIGHTNESSUP,
+       KEY_BRIGHTNESSDOWN,
+       KEY_DOWN,
+       KEY_UP,
+       KEY_SCROLLUP,
+       KEY_SCROLLDOWN,
+       KEY_LEFTCTRL,
+       KEY_LEFTALT
+};
+
+static unsigned short keymap_Stylistic_ST5xxx[KEYMAP_LEN] __initconst = {
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_RESERVED,
+       KEY_MAIL,
+       KEY_DIRECTION,
+       KEY_ESC,
+       KEY_ENTER,
+       KEY_BRIGHTNESSUP,
+       KEY_BRIGHTNESSDOWN,
+       KEY_DOWN,
+       KEY_UP,
+       KEY_SCROLLUP,
+       KEY_SCROLLDOWN,
+       KEY_LEFTCTRL,
+       KEY_LEFTALT
+};
+
+static struct {
+       struct input_dev *idev;
+       struct fujitsu_config config;
+       unsigned long prev_keymask;
+
+       char phys[21];
+
+       int irq;
+       int io_base;
+       int io_length;
+} fujitsu;
+
+static u8 fujitsu_ack(void)
+{
+       return inb(fujitsu.io_base + 2);
+}
+
+static u8 fujitsu_status(void)
+{
+       return inb(fujitsu.io_base + 6);
+}
+
+static u8 fujitsu_read_register(const u8 addr)
+{
+       outb(addr, fujitsu.io_base);
+       return inb(fujitsu.io_base + 4);
+}
+
+static void fujitsu_send_state(void)
+{
+       int state;
+       int dock, tablet_mode;
+
+       state = fujitsu_read_register(0xdd);
+
+       dock = state & 0x02;
+
+       if ((fujitsu.config.quirks & FORCE_TABLET_MODE_IF_UNDOCK) && (!dock)) {
+               tablet_mode = 1;
+       } else{
+               tablet_mode = state & 0x01;
+               if (fujitsu.config.quirks & INVERT_TABLET_MODE_BIT)
+                       tablet_mode = !tablet_mode;
+       }
+
+       input_report_switch(fujitsu.idev, SW_DOCK, dock);
+       input_report_switch(fujitsu.idev, SW_TABLET_MODE, tablet_mode);
+       input_sync(fujitsu.idev);
+}
+
+static void fujitsu_reset(void)
+{
+       int timeout = 50;
+
+       fujitsu_ack();
+
+       while ((fujitsu_status() & 0x02) && (--timeout))
+               msleep(20);
+
+       fujitsu_send_state();
+}
+
+static int __devinit input_fujitsu_setup(struct device *parent,
+                                        const char *name, const char *phys)
+{
+       struct input_dev *idev;
+       int error;
+       int i;
+
+       idev = input_allocate_device();
+       if (!idev)
+               return -ENOMEM;
+
+       idev->dev.parent = parent;
+       idev->phys = phys;
+       idev->name = name;
+       idev->id.bustype = BUS_HOST;
+       idev->id.vendor  = 0x1734;      /* Fujitsu Siemens Computer GmbH */
+       idev->id.product = 0x0001;
+       idev->id.version = 0x0101;
+
+       idev->keycode = fujitsu.config.keymap;
+       idev->keycodesize = sizeof(fujitsu.config.keymap[0]);
+       idev->keycodemax = ARRAY_SIZE(fujitsu.config.keymap);
+
+       __set_bit(EV_REP, idev->evbit);
+
+       for (i = 0; i < ARRAY_SIZE(fujitsu.config.keymap); i++)
+               if (fujitsu.config.keymap[i])
+                       input_set_capability(idev, EV_KEY, fujitsu.config.keymap[i]);
+
+       input_set_capability(idev, EV_MSC, MSC_SCAN);
+
+       input_set_capability(idev, EV_SW, SW_DOCK);
+       input_set_capability(idev, EV_SW, SW_TABLET_MODE);
+
+       input_set_capability(idev, EV_SW, SW_DOCK);
+       input_set_capability(idev, EV_SW, SW_TABLET_MODE);
+
+       error = input_register_device(idev);
+       if (error) {
+               input_free_device(idev);
+               return error;
+       }
+
+       fujitsu.idev = idev;
+       return 0;
+}
+
+static void input_fujitsu_remove(void)
+{
+       input_unregister_device(fujitsu.idev);
+}
+
+static irqreturn_t fujitsu_interrupt(int irq, void *dev_id)
+{
+       unsigned long keymask, changed;
+       unsigned int keycode;
+       int pressed;
+       int i;
+
+       if (unlikely(!(fujitsu_status() & 0x01)))
+               return IRQ_NONE;
+
+       fujitsu_send_state();
+
+       keymask  = fujitsu_read_register(0xde);
+       keymask |= fujitsu_read_register(0xdf) << 8;
+       keymask ^= 0xffff;
+
+       changed = keymask ^ fujitsu.prev_keymask;
+       if (changed) {
+               fujitsu.prev_keymask = keymask;
+
+               for_each_set_bit(i, &changed, KEYMAP_LEN) {
+                       keycode = fujitsu.config.keymap[i];
+                       pressed = keymask & changed & BIT(i);
+
+                       if (pressed)
+                               input_event(fujitsu.idev, EV_MSC, MSC_SCAN, i);
+
+                       input_report_key(fujitsu.idev, keycode, pressed);
+                       input_sync(fujitsu.idev);
+               }
+       }
+
+       fujitsu_ack();
+       return IRQ_HANDLED;
+}
+
+static int __devinit fujitsu_dmi_default(const struct dmi_system_id *dmi)
+{
+       printk(KERN_INFO MODULENAME ": %s\n", dmi->ident);
+       memcpy(fujitsu.config.keymap, dmi->driver_data,
+                       sizeof(fujitsu.config.keymap));
+       return 1;
+}
+
+static int __devinit fujitsu_dmi_stylistic(const struct dmi_system_id *dmi)
+{
+       fujitsu_dmi_default(dmi);
+       fujitsu.config.quirks |= FORCE_TABLET_MODE_IF_UNDOCK;
+       fujitsu.config.quirks |= INVERT_TABLET_MODE_BIT;
+       return 1;
+}
+
+static struct dmi_system_id dmi_ids[] __initconst = {
+       {
+               .callback = fujitsu_dmi_default,
+               .ident = "Fujitsu Siemens P/T Series",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK")
+               },
+               .driver_data = keymap_Lifebook_Tseries
+       },
+       {
+               .callback = fujitsu_dmi_default,
+               .ident = "Fujitsu Lifebook T Series",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook T")
+               },
+               .driver_data = keymap_Lifebook_Tseries
+       },
+       {
+               .callback = fujitsu_dmi_stylistic,
+               .ident = "Fujitsu Siemens Stylistic T Series",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Stylistic T")
+               },
+               .driver_data = keymap_Stylistic_Tseries
+       },
+       {
+               .callback = fujitsu_dmi_default,
+               .ident = "Fujitsu LifeBook U810",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook U810")
+               },
+               .driver_data = keymap_Lifebook_U810
+       },
+       {
+               .callback = fujitsu_dmi_stylistic,
+               .ident = "Fujitsu Siemens Stylistic ST5xxx Series",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "STYLISTIC ST5")
+               },
+               .driver_data = keymap_Stylistic_ST5xxx
+       },
+       {
+               .callback = fujitsu_dmi_stylistic,
+               .ident = "Fujitsu Siemens Stylistic ST5xxx Series",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Stylistic ST5")
+               },
+               .driver_data = keymap_Stylistic_ST5xxx
+       },
+       {
+               .callback = fujitsu_dmi_default,
+               .ident = "Unknown (using defaults)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, ""),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "")
+               },
+               .driver_data = keymap_Lifebook_Tseries
+       },
+       { NULL }
+};
+
+static acpi_status __devinit
+fujitsu_walk_resources(struct acpi_resource *res, void *data)
+{
+       switch (res->type) {
+       case ACPI_RESOURCE_TYPE_IRQ:
+               fujitsu.irq = res->data.irq.interrupts[0];
+               return AE_OK;
+
+       case ACPI_RESOURCE_TYPE_IO:
+               fujitsu.io_base = res->data.io.minimum;
+               fujitsu.io_length = res->data.io.address_length;
+               return AE_OK;
+
+       case ACPI_RESOURCE_TYPE_END_TAG:
+               if (fujitsu.irq && fujitsu.io_base)
+                       return AE_OK;
+               else
+                       return AE_NOT_FOUND;
+
+       default:
+               return AE_ERROR;
+       }
+}
+
+static int __devinit acpi_fujitsu_add(struct acpi_device *adev)
+{
+       acpi_status status;
+       int error;
+
+       if (!adev)
+               return -EINVAL;
+
+       status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
+                       fujitsu_walk_resources, NULL);
+       if (ACPI_FAILURE(status) || !fujitsu.irq || !fujitsu.io_base)
+               return -ENODEV;
+
+       sprintf(acpi_device_name(adev), "Fujitsu %s", acpi_device_hid(adev));
+       sprintf(acpi_device_class(adev), "%s", ACPI_FUJITSU_CLASS);
+
+       snprintf(fujitsu.phys, sizeof(fujitsu.phys),
+                       "%s/input0", acpi_device_hid(adev));
+
+       error = input_fujitsu_setup(&adev->dev,
+               acpi_device_name(adev), fujitsu.phys);
+       if (error)
+               return error;
+
+       if (!request_region(fujitsu.io_base, fujitsu.io_length, MODULENAME)) {
+               input_fujitsu_remove();
+               return -EBUSY;
+       }
+
+       fujitsu_reset();
+
+       error = request_irq(fujitsu.irq, fujitsu_interrupt,
+                       IRQF_SHARED, MODULENAME, fujitsu_interrupt);
+       if (error) {
+               release_region(fujitsu.io_base, fujitsu.io_length);
+               input_fujitsu_remove();
+               return error;
+       }
+
+       return 0;
+}
+
+static int __devexit acpi_fujitsu_remove(struct acpi_device *adev, int type)
+{
+       free_irq(fujitsu.irq, fujitsu_interrupt);
+       release_region(fujitsu.io_base, fujitsu.io_length);
+       input_fujitsu_remove();
+       return 0;
+}
+
+static int acpi_fujitsu_resume(struct acpi_device *adev)
+{
+       fujitsu_reset();
+       return 0;
+}
+
+static struct acpi_driver acpi_fujitsu_driver = {
+       .name  = MODULENAME,
+       .class = "hotkey",
+       .ids   = fujitsu_ids,
+       .ops   = {
+               .add    = acpi_fujitsu_add,
+               .remove = acpi_fujitsu_remove,
+               .resume = acpi_fujitsu_resume,
+       }
+};
+
+static int __init fujitsu_module_init(void)
+{
+       int error;
+
+       dmi_check_system(dmi_ids);
+
+       error = acpi_bus_register_driver(&acpi_fujitsu_driver);
+       if (error)
+               return error;
+
+       return 0;
+}
+
+static void __exit fujitsu_module_exit(void)
+{
+       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+}
+
+module_init(fujitsu_module_init);
+module_exit(fujitsu_module_exit);
+
+MODULE_AUTHOR("Robert Gerlach <khnz@gmx.de>");
+MODULE_DESCRIPTION("Fujitsu tablet pc extras driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("2.4");
+
+MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
index 05be30e..ffff8b4 100644 (file)
@@ -562,8 +562,8 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 
        num_sifr = acpi_pcc_get_sqty(device);
 
-       if (num_sifr > 255) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "num_sifr too large"));
+       if (num_sifr < 0 || num_sifr > 255) {
+               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "num_sifr out of range"));
                return -ENODEV;
        }
 
index 2baadd2..98fbe62 100644 (file)
@@ -369,9 +369,9 @@ static int __init pps_init(void)
        int err;
 
        pps_class = class_create(THIS_MODULE, "pps");
-       if (!pps_class) {
+       if (IS_ERR(pps_class)) {
                pr_err("failed to allocate class\n");
-               return -ENOMEM;
+               return PTR_ERR(pps_class);
        }
        pps_class->dev_attrs = pps_attrs;
 
index 691b1ab..30d2072 100644 (file)
@@ -410,13 +410,14 @@ static void tsi721_db_dpc(struct work_struct *work)
         */
        mport = priv->mport;
 
-       wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE));
-       rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+       wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
+       rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE)) % IDB_QSIZE;
 
        while (wr_ptr != rd_ptr) {
                idb_entry = (u64 *)(priv->idb_base +
                                        (TSI721_IDB_ENTRY_SIZE * rd_ptr));
                rd_ptr++;
+               rd_ptr %= IDB_QSIZE;
                idb.msg = *idb_entry;
                *idb_entry = 0;
 
index 822e54c..1c226b3 100644 (file)
 
 #define TSI721_IDB_ENTRY_SIZE  64
 
-#define TSI721_IDQ_CTL(x)      (0x20000 + (x) * 1000)
+#define TSI721_IDQ_CTL(x)      (0x20000 + (x) * 0x1000)
 #define TSI721_IDQ_SUSPEND     0x00000002
 #define TSI721_IDQ_INIT                0x00000001
 
-#define TSI721_IDQ_STS(x)      (0x20004 + (x) * 1000)
+#define TSI721_IDQ_STS(x)      (0x20004 + (x) * 0x1000)
 #define TSI721_IDQ_RUN         0x00200000
 
-#define TSI721_IDQ_MASK(x)     (0x20008 + (x) * 1000)
+#define TSI721_IDQ_MASK(x)     (0x20008 + (x) * 0x1000)
 #define TSI721_IDQ_MASK_MASK   0xffff0000
 #define TSI721_IDQ_MASK_PATT   0x0000ffff
 
-#define TSI721_IDQ_RP(x)       (0x2000c + (x) * 1000)
+#define TSI721_IDQ_RP(x)       (0x2000c + (x) * 0x1000)
 #define TSI721_IDQ_RP_PTR      0x0007ffff
 
-#define TSI721_IDQ_WP(x)       (0x20010 + (x) * 1000)
+#define TSI721_IDQ_WP(x)       (0x20010 + (x) * 0x1000)
 #define TSI721_IDQ_WP_PTR      0x0007ffff
 
-#define TSI721_IDQ_BASEL(x)    (0x20014 + (x) * 1000)
+#define TSI721_IDQ_BASEL(x)    (0x20014 + (x) * 0x1000)
 #define TSI721_IDQ_BASEL_ADDR  0xffffffc0
-#define TSI721_IDQ_BASEU(x)    (0x20018 + (x) * 1000)
-#define TSI721_IDQ_SIZE(x)     (0x2001c + (x) * 1000)
+#define TSI721_IDQ_BASEU(x)    (0x20018 + (x) * 0x1000)
+#define TSI721_IDQ_SIZE(x)     (0x2001c + (x) * 0x1000)
 #define TSI721_IDQ_SIZE_VAL(size)      (__fls(size) - 4)
 #define TSI721_IDQ_SIZE_MIN    512
 #define TSI721_IDQ_SIZE_MAX    (512 * 1024)
 
-#define TSI721_SR_CHINT(x)     (0x20040 + (x) * 1000)
-#define TSI721_SR_CHINTE(x)    (0x20044 + (x) * 1000)
-#define TSI721_SR_CHINTSET(x)  (0x20048 + (x) * 1000)
+#define TSI721_SR_CHINT(x)     (0x20040 + (x) * 0x1000)
+#define TSI721_SR_CHINTE(x)    (0x20044 + (x) * 0x1000)
+#define TSI721_SR_CHINTSET(x)  (0x20048 + (x) * 0x1000)
 #define TSI721_SR_CHINT_ODBOK  0x00000020
 #define TSI721_SR_CHINT_IDBQRCV        0x00000010
 #define TSI721_SR_CHINT_SUSP   0x00000008
 
 #define TSI721_IBWIN_NUM       8
 
-#define TSI721_IBWINLB(x)      (0x29000 + (x) * 20)
+#define TSI721_IBWINLB(x)      (0x29000 + (x) * 0x20)
 #define TSI721_IBWINLB_BA      0xfffff000
 #define TSI721_IBWINLB_WEN     0x00000001
 
  */
 #define TSI721_OBWIN_NUM       TSI721_PC2SR_WINS
 
-#define TSI721_OBWINLB(x)      (0x40000 + (x) * 20)
+#define TSI721_OBWINLB(x)      (0x40000 + (x) * 0x20)
 #define TSI721_OBWINLB_BA      0xffff8000
 #define TSI721_OBWINLB_WEN     0x00000001
 
-#define TSI721_OBWINUB(x)      (0x40004 + (x) * 20)
+#define TSI721_OBWINUB(x)      (0x40004 + (x) * 0x20)
 
-#define TSI721_OBWINSZ(x)      (0x40008 + (x) * 20)
+#define TSI721_OBWINSZ(x)      (0x40008 + (x) * 0x20)
 #define TSI721_OBWINSZ_SIZE    0x00001f00
 #define TSI721_OBWIN_SIZE(size)        (__fls(size) - 15)
 
index 3767364..09915e8 100644 (file)
@@ -226,7 +226,7 @@ static int da9052_regulator_set_voltage_int(struct regulator_dev *rdev,
        if (min_uV < info->min_uV)
                min_uV = info->min_uV;
 
-       *selector = (min_uV - info->min_uV) / info->step_uV;
+       *selector = DIV_ROUND_UP(min_uV - info->min_uV, info->step_uV);
 
        ret = da9052_list_voltage(rdev, *selector);
        if (ret < 0)
@@ -260,8 +260,8 @@ static int da9052_set_ldo5_6_voltage(struct regulator_dev *rdev,
         * the LDO activate bit to implment the changes on the
         * LDO output.
        */
-       return da9052_reg_update(regulator->da9052, DA9052_SUPPLY_REG, 0,
-                                info->activate_bit);
+       return da9052_reg_update(regulator->da9052, DA9052_SUPPLY_REG,
+                                info->activate_bit, info->activate_bit);
 }
 
 static int da9052_set_dcdc_voltage(struct regulator_dev *rdev,
@@ -280,8 +280,8 @@ static int da9052_set_dcdc_voltage(struct regulator_dev *rdev,
         * the DCDC activate bit to implment the changes on the
         * DCDC output.
        */
-       return da9052_reg_update(regulator->da9052, DA9052_SUPPLY_REG, 0,
-                                info->activate_bit);
+       return da9052_reg_update(regulator->da9052, DA9052_SUPPLY_REG,
+                                info->activate_bit, info->activate_bit);
 }
 
 static int da9052_get_regulator_voltage_sel(struct regulator_dev *rdev)
@@ -318,10 +318,10 @@ static int da9052_set_buckperi_voltage(struct regulator_dev *rdev, int min_uV,
        if ((regulator->da9052->chip_id == DA9052) &&
            (min_uV >= DA9052_CONST_3uV))
                *selector = DA9052_BUCK_PERI_REG_MAP_UPTO_3uV +
-                           ((min_uV - DA9052_CONST_3uV) /
-                           (DA9052_BUCK_PERI_3uV_STEP));
+                           DIV_ROUND_UP(min_uV - DA9052_CONST_3uV,
+                                        DA9052_BUCK_PERI_3uV_STEP);
        else
-               *selector = (min_uV - info->min_uV) / info->step_uV;
+               *selector = DIV_ROUND_UP(min_uV - info->min_uV, info->step_uV);
 
        ret = da9052_list_buckperi_voltage(rdev, *selector);
        if (ret < 0)
@@ -400,6 +400,7 @@ static struct regulator_ops da9052_ldo_ops = {
                .ops = &da9052_ldo5_6_ops,\
                .type = REGULATOR_VOLTAGE,\
                .id = _id,\
+               .n_voltages = (max - min) / step + 1, \
                .owner = THIS_MODULE,\
        },\
        .min_uV = (min) * 1000,\
@@ -417,6 +418,7 @@ static struct regulator_ops da9052_ldo_ops = {
                .ops = &da9052_ldo_ops,\
                .type = REGULATOR_VOLTAGE,\
                .id = _id,\
+               .n_voltages = (max - min) / step + 1, \
                .owner = THIS_MODULE,\
        },\
        .min_uV = (min) * 1000,\
@@ -434,6 +436,7 @@ static struct regulator_ops da9052_ldo_ops = {
                .ops = &da9052_dcdc_ops,\
                .type = REGULATOR_VOLTAGE,\
                .id = _id,\
+               .n_voltages = (max - min) / step + 1, \
                .owner = THIS_MODULE,\
        },\
        .min_uV = (min) * 1000,\
@@ -451,6 +454,7 @@ static struct regulator_ops da9052_ldo_ops = {
                .ops = &da9052_buckperi_ops,\
                .type = REGULATOR_VOLTAGE,\
                .id = _id,\
+               .n_voltages = (max - min) / step + 1, \
                .owner = THIS_MODULE,\
        },\
        .min_uV = (min) * 1000,\
index 70b7b1f..2e94686 100644 (file)
@@ -481,7 +481,7 @@ static int set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV,
        if (i >= info->n_voltages)
                i = info->n_voltages - 1;
 
-       *selector = info->voltages[i];
+       *selector = i;
 
        return write_field(hw, &info->voltage, i);
 }
index 5c15ba0..40ecf51 100644 (file)
@@ -662,7 +662,7 @@ static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
                tps65910_reg_write(pmic, TPS65910_VDD2_OP, vsel);
                break;
        case TPS65911_REG_VDDCTRL:
-               vsel = selector;
+               vsel = selector + 3;
                tps65910_reg_write(pmic, TPS65911_VDDCTRL_OP, vsel);
        }
 
index 8a1c031..dc87eda 100644 (file)
@@ -73,6 +73,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
                err = -EINVAL;
 
        mutex_unlock(&rtc->ops_lock);
+       /* A timer might have just expired */
+       schedule_work(&rtc->irqwork);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -112,6 +114,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
                err = -EINVAL;
 
        mutex_unlock(&rtc->ops_lock);
+       /* A timer might have just expired */
+       schedule_work(&rtc->irqwork);
 
        return err;
 }
@@ -380,18 +384,27 @@ EXPORT_SYMBOL_GPL(rtc_set_alarm);
 int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
        int err;
+       struct rtc_time now;
 
        err = rtc_valid_tm(&alarm->time);
        if (err != 0)
                return err;
 
+       err = rtc_read_time(rtc, &now);
+       if (err)
+               return err;
+
        err = mutex_lock_interruptible(&rtc->ops_lock);
        if (err)
                return err;
 
        rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
        rtc->aie_timer.period = ktime_set(0, 0);
-       if (alarm->enabled) {
+
+       /* Alarm has to be enabled & in the futrure for us to enqueue it */
+       if (alarm->enabled && (rtc_tm_to_ktime(now).tv64 <
+                        rtc->aie_timer.node.expires.tv64)) {
+
                rtc->aie_timer.enabled = 1;
                timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
        }
@@ -763,6 +776,14 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
        return 0;
 }
 
+static void rtc_alarm_disable(struct rtc_device *rtc)
+{
+       if (!rtc->ops || !rtc->ops->alarm_irq_enable)
+               return;
+
+       rtc->ops->alarm_irq_enable(rtc->dev.parent, false);
+}
+
 /**
  * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue
  * @rtc rtc device
@@ -784,8 +805,10 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
                struct rtc_wkalrm alarm;
                int err;
                next = timerqueue_getnext(&rtc->timerqueue);
-               if (!next)
+               if (!next) {
+                       rtc_alarm_disable(rtc);
                        return;
+               }
                alarm.time = rtc_ktime_to_tm(next->expires);
                alarm.enabled = 1;
                err = __rtc_set_alarm(rtc, &alarm);
@@ -847,7 +870,8 @@ again:
                err = __rtc_set_alarm(rtc, &alarm);
                if (err == -ETIME)
                        goto again;
-       }
+       } else
+               rtc_alarm_disable(rtc);
 
        mutex_unlock(&rtc->ops_lock);
 }
index 9beba49..2853c2a 100644 (file)
@@ -125,6 +125,13 @@ static int __devinit r9701_probe(struct spi_device *spi)
        unsigned char tmp;
        int res;
 
+       tmp = R100CNT;
+       res = read_regs(&spi->dev, &tmp, 1);
+       if (res || tmp != 0x20) {
+               dev_err(&spi->dev, "cannot read RTC register\n");
+               return -ENODEV;
+       }
+
        rtc = rtc_device_register("r9701",
                                &spi->dev, &r9701_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc))
@@ -132,13 +139,6 @@ static int __devinit r9701_probe(struct spi_device *spi)
 
        dev_set_drvdata(&spi->dev, rtc);
 
-       tmp = R100CNT;
-       res = read_regs(&spi->dev, &tmp, 1);
-       if (res || tmp != 0x20) {
-               rtc_device_unregister(rtc);
-               return res;
-       }
-
        return 0;
 }
 
index 3ef8d07..770a740 100644 (file)
@@ -167,7 +167,7 @@ again:
        DBF_ERROR("%4x EQBS ERROR", SCH_NO(q));
        DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
        q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION,
-                  0, -1, -1, q->irq_ptr->int_parm);
+                  q->nr, q->first_to_kick, count, q->irq_ptr->int_parm);
        return 0;
 }
 
@@ -215,7 +215,7 @@ again:
        DBF_ERROR("%4x SQBS ERROR", SCH_NO(q));
        DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
        q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION,
-                  0, -1, -1, q->irq_ptr->int_parm);
+                  q->nr, q->first_to_kick, count, q->irq_ptr->int_parm);
        return 0;
 }
 
index 0cb39ff..f8fb2d6 100644 (file)
@@ -408,7 +408,7 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
                        kunmap_atomic(sdt, KM_USER0);
                }
 
-               bio->bi_flags |= BIO_MAPPED_INTEGRITY;
+               bio->bi_flags |= (1 << BIO_MAPPED_INTEGRITY);
        }
 
        return 0;
index 2f9cb43..f37ad22 100644 (file)
@@ -1083,7 +1083,7 @@ err_alloc_rx_sg:
        return -ENOMEM;
 }
 
-static int __init pl022_dma_probe(struct pl022 *pl022)
+static int __devinit pl022_dma_probe(struct pl022 *pl022)
 {
        dma_cap_mask_t mask;
 
index 4426290..501b27c 100644 (file)
@@ -1028,7 +1028,7 @@ done:
                return iscsit_add_reject_from_cmd(
                                ISCSI_REASON_BOOKMARK_NO_RESOURCES,
                                1, 1, buf, cmd);
-       } else if (transport_ret == -EINVAL) {
+       } else if (transport_ret < 0) {
                /*
                 * Unsupported SAM Opcode.  CHECK_CONDITION will be sent
                 * in iscsit_execute_cmd() during the CmdSN OOO Execution
index b7c7793..63e703b 100644 (file)
@@ -117,7 +117,7 @@ static struct t10_pr_registration *core_scsi3_locate_pr_reg(struct se_device *,
                                        struct se_node_acl *, struct se_session *);
 static void core_scsi3_put_pr_reg(struct t10_pr_registration *);
 
-static int target_check_scsi2_reservation_conflict(struct se_cmd *cmd, int *ret)
+static int target_check_scsi2_reservation_conflict(struct se_cmd *cmd)
 {
        struct se_session *se_sess = cmd->se_sess;
        struct se_subsystem_dev *su_dev = cmd->se_dev->se_sub_dev;
@@ -127,7 +127,7 @@ static int target_check_scsi2_reservation_conflict(struct se_cmd *cmd, int *ret)
        int conflict = 0;
 
        if (!crh)
-               return false;
+               return -EINVAL;
 
        pr_reg = core_scsi3_locate_pr_reg(cmd->se_dev, se_sess->se_node_acl,
                        se_sess);
@@ -155,16 +155,14 @@ static int target_check_scsi2_reservation_conflict(struct se_cmd *cmd, int *ret)
                 */
                if (pr_reg->pr_res_holder) {
                        core_scsi3_put_pr_reg(pr_reg);
-                       *ret = 0;
-                       return false;
+                       return 1;
                }
                if ((pr_reg->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_REGONLY) ||
                    (pr_reg->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_REGONLY) ||
                    (pr_reg->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
                    (pr_reg->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
                        core_scsi3_put_pr_reg(pr_reg);
-                       *ret = 0;
-                       return true;
+                       return 1;
                }
                core_scsi3_put_pr_reg(pr_reg);
                conflict = 1;
@@ -189,10 +187,10 @@ static int target_check_scsi2_reservation_conflict(struct se_cmd *cmd, int *ret)
                        " while active SPC-3 registrations exist,"
                        " returning RESERVATION_CONFLICT\n");
                cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
-               return true;
+               return -EBUSY;
        }
 
-       return false;
+       return 0;
 }
 
 int target_scsi2_reservation_release(struct se_task *task)
@@ -201,12 +199,18 @@ int target_scsi2_reservation_release(struct se_task *task)
        struct se_device *dev = cmd->se_dev;
        struct se_session *sess = cmd->se_sess;
        struct se_portal_group *tpg = sess->se_tpg;
-       int ret = 0;
+       int ret = 0, rc;
 
        if (!sess || !tpg)
                goto out;
-       if (target_check_scsi2_reservation_conflict(cmd, &ret))
+       rc = target_check_scsi2_reservation_conflict(cmd);
+       if (rc == 1)
+               goto out;
+       else if (rc < 0) {
+               cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
+               ret = -EINVAL;
                goto out;
+       }
 
        ret = 0;
        spin_lock(&dev->dev_reservation_lock);
@@ -243,7 +247,7 @@ int target_scsi2_reservation_reserve(struct se_task *task)
        struct se_device *dev = cmd->se_dev;
        struct se_session *sess = cmd->se_sess;
        struct se_portal_group *tpg = sess->se_tpg;
-       int ret = 0;
+       int ret = 0, rc;
 
        if ((cmd->t_task_cdb[1] & 0x01) &&
            (cmd->t_task_cdb[1] & 0x02)) {
@@ -259,8 +263,14 @@ int target_scsi2_reservation_reserve(struct se_task *task)
         */
        if (!sess || !tpg)
                goto out;
-       if (target_check_scsi2_reservation_conflict(cmd, &ret))
+       rc = target_check_scsi2_reservation_conflict(cmd);
+       if (rc == 1)
                goto out;
+       else if (rc < 0) {
+               cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
+               ret = -EINVAL;
+               goto out;
+       }
 
        ret = 0;
        spin_lock(&dev->dev_reservation_lock);
index 58cea07..cd5cd95 100644 (file)
@@ -2539,6 +2539,7 @@ static int transport_generic_cmd_sequencer(
                                        cmd, cdb, pr_reg_type) != 0) {
                        cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                        cmd->se_cmd_flags |= SCF_SCSI_RESERVATION_CONFLICT;
+                       cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
                        cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
                        return -EBUSY;
                }
index 4c0507c..eff512b 100644 (file)
@@ -86,16 +86,6 @@ static struct ft_tport *ft_tport_create(struct fc_lport *lport)
 }
 
 /*
- * Free tport via RCU.
- */
-static void ft_tport_rcu_free(struct rcu_head *rcu)
-{
-       struct ft_tport *tport = container_of(rcu, struct ft_tport, rcu);
-
-       kfree(tport);
-}
-
-/*
  * Delete a target local port.
  * Caller holds ft_lport_lock.
  */
@@ -114,7 +104,7 @@ static void ft_tport_delete(struct ft_tport *tport)
                tpg->tport = NULL;
                tport->tpg = NULL;
        }
-       call_rcu(&tport->rcu, ft_tport_rcu_free);
+       kfree_rcu(tport, rcu);
 }
 
 /*
index b3d1741..830cd62 100644 (file)
@@ -365,7 +365,7 @@ config PPC_EPAPR_HV_BYTECHAN
 
 config PPC_EARLY_DEBUG_EHV_BC
        bool "Early console (udbg) support for ePAPR hypervisors"
-       depends on PPC_EPAPR_HV_BYTECHAN
+       depends on PPC_EPAPR_HV_BYTECHAN=y
        help
          Select this option to enable early console (a.k.a. "udbg") support
          via an ePAPR byte channel.  You also need to choose the byte channel
index 7508579..61b7fd2 100644 (file)
@@ -1710,6 +1710,8 @@ static int sci_startup(struct uart_port *port)
 
        dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
 
+       pm_runtime_put_noidle(port->dev);
+
        sci_port_enable(s);
 
        ret = sci_request_irq(s);
@@ -1737,6 +1739,8 @@ static void sci_shutdown(struct uart_port *port)
        sci_free_irq(s);
 
        sci_port_disable(s);
+
+       pm_runtime_get_noresume(port->dev);
 }
 
 static unsigned int sci_scbrr_calc(unsigned int algo_id, unsigned int bps,
@@ -2075,6 +2079,7 @@ static int __devinit sci_init_single(struct platform_device *dev,
                sci_init_gpios(sci_port);
 
                pm_runtime_irq_safe(&dev->dev);
+               pm_runtime_get_noresume(&dev->dev);
                pm_runtime_enable(&dev->dev);
        }
 
index c26a82e..b556a72 100644 (file)
@@ -239,7 +239,7 @@ static void ehci_fsl_setup_phy(struct ehci_hcd *ehci,
        ehci_writel(ehci, portsc, &ehci->regs->port_status[port_offset]);
 }
 
-static int ehci_fsl_usb_setup(struct ehci_hcd *ehci)
+static void ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 {
        struct usb_hcd *hcd = ehci_to_hcd(ehci);
        struct fsl_usb2_platform_data *pdata;
@@ -299,19 +299,12 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 #endif
                out_be32(non_ehci + FSL_SOC_USB_SICTRL, 0x00000001);
        }
-
-       if (!(in_be32(non_ehci + FSL_SOC_USB_CTRL) & CTRL_PHY_CLK_VALID)) {
-               printk(KERN_WARNING "fsl-ehci: USB PHY clock invalid\n");
-               return -ENODEV;
-       }
-       return 0;
 }
 
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_fsl_reinit(struct ehci_hcd *ehci)
 {
-       if (ehci_fsl_usb_setup(ehci))
-               return -ENODEV;
+       ehci_fsl_usb_setup(ehci);
        ehci_port_power(ehci, 0);
 
        return 0;
index bdf43e2..4918062 100644 (file)
@@ -45,6 +45,5 @@
 #define FSL_SOC_USB_PRICTRL    0x40c   /* NOTE: big-endian */
 #define FSL_SOC_USB_SICTRL     0x410   /* NOTE: big-endian */
 #define FSL_SOC_USB_CTRL       0x500   /* NOTE: big-endian */
-#define CTRL_PHY_CLK_VALID     (1 << 17)
 #define SNOOP_SIZE_2GB         0x1e
 #endif                         /* _EHCI_FSL_H */
index e132157..516db70 100644 (file)
@@ -690,7 +690,7 @@ static ssize_t s6e63m0_sysfs_store_gamma_mode(struct device *dev,
        struct backlight_device *bd = NULL;
        int brightness, rc;
 
-       rc = strict_strtoul(buf, 0, (unsigned long *)&lcd->gamma_mode);
+       rc = kstrtouint(buf, 0, &lcd->gamma_mode);
        if (rc < 0)
                return rc;
 
index d621f02..aa19526 100644 (file)
@@ -4,6 +4,10 @@
 
 menu "File systems"
 
+# Use unaligned word dcache accesses
+config DCACHE_WORD_ACCESS
+       bool
+
 if BLOCK
 
 source "fs/ext2/Kconfig"
index 14d89fa..8f6e923 100644 (file)
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
        ASSERT(key != NULL);
 
        vnode = AFS_FS_I(mapping->host);
-       if (vnode->flags & AFS_VNODE_DELETED) {
+       if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                _leave(" = -ESTALE");
                return -ESTALE;
        }
index d2b0888..a306bb6 100644 (file)
@@ -109,7 +109,7 @@ struct afs_call {
        unsigned                reply_size;     /* current size of reply */
        unsigned                first_offset;   /* offset into mapping[first] */
        unsigned                last_to;        /* amount of mapping[last] */
-       unsigned short          offset;         /* offset into received data store */
+       unsigned                offset;         /* offset into received data store */
        unsigned char           unmarshall;     /* unmarshalling phase */
        bool                    incoming;       /* T if incoming call */
        bool                    send_pages;     /* T if data from mapping should be sent */
index e45a323..8ad8c2a 100644 (file)
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        struct msghdr msg;
        struct kvec iov[1];
        int ret;
+       struct sk_buff *skb;
 
        _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 error_do_abort:
        rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+       while ((skb = skb_dequeue(&call->rx_queue)))
+               afs_free_skb(skb);
        rxrpc_kernel_end_call(rxcall);
        call->rxcall = NULL;
 error_kill_call:
index 969beb0..b9d64d8 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx)
        call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
 
-static inline void get_ioctx(struct kioctx *kioctx)
-{
-       BUG_ON(atomic_read(&kioctx->users) <= 0);
-       atomic_inc(&kioctx->users);
-}
-
 static inline int try_get_ioctx(struct kioctx *kioctx)
 {
        return atomic_inc_not_zero(&kioctx->users);
@@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        mm = ctx->mm = current->mm;
        atomic_inc(&mm->mm_count);
 
-       atomic_set(&ctx->users, 1);
+       atomic_set(&ctx->users, 2);
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->ring_info.ring_lock);
        init_waitqueue_head(&ctx->wait);
@@ -490,6 +484,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
                kmem_cache_free(kiocb_cachep, req);
                ctx->reqs_active--;
        }
+       if (unlikely(!ctx->reqs_active && ctx->dead))
+               wake_up_all(&ctx->wait);
        spin_unlock_irq(&ctx->ctx_lock);
 }
 
@@ -607,11 +603,16 @@ static void aio_fput_routine(struct work_struct *data)
                        fput(req->ki_filp);
 
                /* Link the iocb into the context's free list */
+               rcu_read_lock();
                spin_lock_irq(&ctx->ctx_lock);
                really_put_req(ctx, req);
+               /*
+                * at that point ctx might've been killed, but actual
+                * freeing is RCU'd
+                */
                spin_unlock_irq(&ctx->ctx_lock);
+               rcu_read_unlock();
 
-               put_ioctx(ctx);
                spin_lock_irq(&fput_lock);
        }
        spin_unlock_irq(&fput_lock);
@@ -642,7 +643,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
         * this function will be executed w/out any aio kthread wakeup.
         */
        if (unlikely(!fput_atomic(req->ki_filp))) {
-               get_ioctx(ctx);
                spin_lock(&fput_lock);
                list_add(&req->ki_list, &fput_head);
                spin_unlock(&fput_lock);
@@ -1336,10 +1336,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
        ret = PTR_ERR(ioctx);
        if (!IS_ERR(ioctx)) {
                ret = put_user(ioctx->user_id, ctxp);
-               if (!ret)
+               if (!ret) {
+                       put_ioctx(ioctx);
                        return 0;
-
-               get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+               }
                io_destroy(ioctx);
        }
 
index a6395bd..1ff9405 100644 (file)
@@ -259,6 +259,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        current->mm->free_area_cache = current->mm->mmap_base;
        current->mm->cached_hole_size = 0;
 
+       retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
+       if (retval < 0) {
+               /* Someone check-me: is this error path enough? */
+               send_sig(SIGKILL, current, 0);
+               return retval;
+       }
+
        install_exec_creds(bprm);
        current->flags &= ~PF_FORKNOEXEC;
 
@@ -352,13 +359,6 @@ beyond_if:
                return retval;
        }
 
-       retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
-       if (retval < 0) { 
-               /* Someone check-me: is this error path enough? */ 
-               send_sig(SIGKILL, current, 0); 
-               return retval;
-       }
-
        current->mm->start_stack =
                (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
 #ifdef __alpha__
index 0e575d1..5e9f198 100644 (file)
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                         * The latter is necessary to prevent ghost
                         * partitions on a removed medium.
                         */
-                       if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-                               rescan_partitions(disk, bdev);
+                       if (bdev->bd_invalidated) {
+                               if (!ret)
+                                       rescan_partitions(disk, bdev);
+                               else if (ret == -ENOMEDIUM)
+                                       invalidate_partitions(disk, bdev);
+                       }
                        if (ret)
                                goto out_clear;
                } else {
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (bdev->bd_disk->fops->open)
                                ret = bdev->bd_disk->fops->open(bdev, mode);
                        /* the same as first opener case, read comment there */
-                       if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-                               rescan_partitions(bdev->bd_disk, bdev);
+                       if (bdev->bd_invalidated) {
+                               if (!ret)
+                                       rescan_partitions(bdev->bd_disk, bdev);
+                               else if (ret == -ENOMEDIUM)
+                                       invalidate_partitions(bdev->bd_disk, bdev);
+                       }
                        if (ret)
                                goto out_unlock_bdev;
                }
index 98f6bf1..0436c12 100644 (file)
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct btrfs_key info_key = { 0 };
        struct btrfs_delayed_ref_root *delayed_refs = NULL;
-       struct btrfs_delayed_ref_head *head = NULL;
+       struct btrfs_delayed_ref_head *head;
        int info_level = 0;
        int ret;
        struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
         * at a specified point in time
         */
 again:
+       head = NULL;
+
        ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
        if (ret < 0)
                goto out;
@@ -635,8 +637,10 @@ again:
                        goto again;
                }
                ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
-               if (ret)
+               if (ret) {
+                       spin_unlock(&delayed_refs->lock);
                        goto out;
+               }
        }
        spin_unlock(&delayed_refs->lock);
 
index 2373b39..22db045 100644 (file)
@@ -305,7 +305,7 @@ again:
 
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_insert(&dev->reada_zones,
-                               (unsigned long)zone->end >> PAGE_CACHE_SHIFT,
+                               (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
                                zone);
        spin_unlock(&fs_info->reada_lock);
 
index 63a196b..bc7e244 100644 (file)
@@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                         * If either that or op not supported returned, follow
                         * the normal lookup.
                         */
-                       if ((rc == 0) || (rc == -ENOENT))
+                       switch (rc) {
+                       case 0:
+                               /*
+                                * The server may allow us to open things like
+                                * FIFOs, but the client isn't set up to deal
+                                * with that. If it's not a regular file, just
+                                * close it and proceed as if it were a normal
+                                * lookup.
+                                */
+                               if (newInode && !S_ISREG(newInode->i_mode)) {
+                                       CIFSSMBClose(xid, pTcon, fileHandle);
+                                       break;
+                               }
+                       case -ENOENT:
                                posix_open = true;
-                       else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+                       case -EOPNOTSUPP:
+                               break;
+                       default:
                                pTcon->broken_posix_open = true;
+                       }
                }
                if (!posix_open)
                        rc = cifs_get_inode_info_unix(&newInode, full_path,
index 4dd9283..5e64748 100644 (file)
@@ -920,16 +920,26 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
        for (lockp = &inode->i_flock; *lockp != NULL; \
             lockp = &(*lockp)->fl_next)
 
+struct lock_to_push {
+       struct list_head llist;
+       __u64 offset;
+       __u64 length;
+       __u32 pid;
+       __u16 netfid;
+       __u8 type;
+};
+
 static int
 cifs_push_posix_locks(struct cifsFileInfo *cfile)
 {
        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct file_lock *flock, **before;
-       struct cifsLockInfo *lck, *tmp;
+       unsigned int count = 0, i = 0;
        int rc = 0, xid, type;
+       struct list_head locks_to_send, *el;
+       struct lock_to_push *lck, *tmp;
        __u64 length;
-       struct list_head locks_to_send;
 
        xid = GetXid();
 
@@ -940,29 +950,55 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
                return rc;
        }
 
+       lock_flocks();
+       cifs_for_each_lock(cfile->dentry->d_inode, before) {
+               if ((*before)->fl_flags & FL_POSIX)
+                       count++;
+       }
+       unlock_flocks();
+
        INIT_LIST_HEAD(&locks_to_send);
 
+       /*
+        * Allocating count locks is enough because no locks can be added to
+        * the list while we are holding cinode->lock_mutex that protects
+        * locking operations of this inode.
+        */
+       for (; i < count; i++) {
+               lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
+               if (!lck) {
+                       rc = -ENOMEM;
+                       goto err_out;
+               }
+               list_add_tail(&lck->llist, &locks_to_send);
+       }
+
+       i = 0;
+       el = locks_to_send.next;
        lock_flocks();
        cifs_for_each_lock(cfile->dentry->d_inode, before) {
+               if (el == &locks_to_send) {
+                       /* something is really wrong */
+                       cERROR(1, "Can't push all brlocks!");
+                       break;
+               }
                flock = *before;
+               if ((flock->fl_flags & FL_POSIX) == 0)
+                       continue;
                length = 1 + flock->fl_end - flock->fl_start;
                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
                        type = CIFS_RDLCK;
                else
                        type = CIFS_WRLCK;
-
-               lck = cifs_lock_init(flock->fl_start, length, type,
-                                    cfile->netfid);
-               if (!lck) {
-                       rc = -ENOMEM;
-                       goto send_locks;
-               }
+               lck = list_entry(el, struct lock_to_push, llist);
                lck->pid = flock->fl_pid;
-
-               list_add_tail(&lck->llist, &locks_to_send);
+               lck->netfid = cfile->netfid;
+               lck->length = length;
+               lck->type = type;
+               lck->offset = flock->fl_start;
+               i++;
+               el = el->next;
        }
-
-send_locks:
        unlock_flocks();
 
        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
@@ -979,11 +1015,18 @@ send_locks:
                kfree(lck);
        }
 
+out:
        cinode->can_cache_brlcks = false;
        mutex_unlock(&cinode->lock_mutex);
 
        FreeXid(xid);
        return rc;
+err_out:
+       list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
+               list_del(&lck->llist);
+               kfree(lck);
+       }
+       goto out;
 }
 
 static int
index a5f54b7..745da3d 100644 (file)
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
                fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
                fattr->cf_dtype = DT_DIR;
+               /*
+                * Server can return wrong NumberOfLinks value for directories
+                * when Unix extensions are disabled - fake it.
+                */
+               fattr->cf_nlink = 2;
        } else {
                fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
                fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                /* clear write bits if ATTR_READONLY is set */
                if (fattr->cf_cifsattrs & ATTR_READONLY)
                        fattr->cf_mode &= ~(S_IWUGO);
-       }
 
-       fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+               fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+       }
 
        fattr->cf_uid = cifs_sb->mnt_uid;
        fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
                        }
 /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
        to set uid/gid */
-                       inc_nlink(inode);
 
                        cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
                        cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
                d_drop(direntry);
        } else {
 mkdir_get_info:
-               inc_nlink(inode);
                if (pTcon->unix_ext)
                        rc = cifs_get_inode_info_unix(&newinode, full_path,
                                                      inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
                }
        }
 mkdir_out:
+       /*
+        * Force revalidate to get parent dir info when needed since cached
+        * attributes are invalid now.
+        */
+       CIFS_I(inode)->time = 0;
        kfree(full_path);
        FreeXid(xid);
        cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
        cifs_put_tlink(tlink);
 
        if (!rc) {
-               drop_nlink(inode);
                spin_lock(&direntry->d_inode->i_lock);
                i_size_write(direntry->d_inode, 0);
                clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
        }
 
        cifsInode = CIFS_I(direntry->d_inode);
-       cifsInode->time = 0;    /* force revalidate to go get info when
-                                  needed */
+       /* force revalidate to go get info when needed */
+       cifsInode->time = 0;
 
        cifsInode = CIFS_I(inode);
-       cifsInode->time = 0;    /* force revalidate to get parent dir info
-                                  since cached search results now invalid */
+       /*
+        * Force revalidate to get parent dir info when needed since cached
+        * attributes are invalid now.
+        */
+       cifsInode->time = 0;
 
        direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
                current_fs_time(inode->i_sb);
index 45f07c4..10d92cf 100644 (file)
@@ -105,7 +105,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
        struct cifs_tcon *pTcon;
        struct super_block *sb;
        char *full_path;
-       struct cifs_ntsd *pacl;
 
        if (direntry == NULL)
                return -EIO;
@@ -164,23 +163,24 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
                        strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+#ifdef CONFIG_CIFS_ACL
+               struct cifs_ntsd *pacl;
                pacl = kmalloc(value_size, GFP_KERNEL);
                if (!pacl) {
                        cFYI(1, "%s: Can't allocate memory for ACL",
                                        __func__);
                        rc = -ENOMEM;
                } else {
-#ifdef CONFIG_CIFS_ACL
                        memcpy(pacl, ea_value, value_size);
                        rc = set_cifs_acl(pacl, value_size,
                                direntry->d_inode, full_path, CIFS_ACL_DACL);
                        if (rc == 0) /* force revalidate of the inode */
                                CIFS_I(direntry->d_inode)->time = 0;
                        kfree(pacl);
+               }
 #else
                        cFYI(1, "Set CIFS ACL not supported yet");
 #endif /* CONFIG_CIFS_ACL */
-               }
        } else {
                int temp;
                temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
index 138be96..11828de 100644 (file)
@@ -105,10 +105,10 @@ static unsigned int d_hash_shift __read_mostly;
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
-                                       unsigned long hash)
+                                       unsigned int hash)
 {
-       hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
-       hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+       hash += (unsigned long) parent / L1_CACHE_BYTES;
+       hash = hash + (hash >> D_HASHBITS);
        return dentry_hashtable + (hash & D_HASHMASK);
 }
 
@@ -137,6 +137,49 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 }
 #endif
 
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+                               const unsigned char *ct, size_t tcount)
+{
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+       unsigned long a,b,mask;
+
+       if (unlikely(scount != tcount))
+               return 1;
+
+       for (;;) {
+               a = *(unsigned long *)cs;
+               b = *(unsigned long *)ct;
+               if (tcount < sizeof(unsigned long))
+                       break;
+               if (unlikely(a != b))
+                       return 1;
+               cs += sizeof(unsigned long);
+               ct += sizeof(unsigned long);
+               tcount -= sizeof(unsigned long);
+               if (!tcount)
+                       return 0;
+       }
+       mask = ~(~0ul << tcount*8);
+       return unlikely(!!((a ^ b) & mask));
+#else
+       if (scount != tcount)
+               return 1;
+
+       do {
+               if (*cs != *ct)
+                       return 1;
+               cs++;
+               ct++;
+               tcount--;
+       } while (tcount);
+       return 0;
+#endif
+}
+
 static void __d_free(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
index ea54cde..4d9d3a4 100644 (file)
@@ -988,6 +988,10 @@ static int path_count[PATH_ARR_SIZE];
 
 static int path_count_inc(int nests)
 {
+       /* Allow an arbitrary number of depth 1 paths */
+       if (nests == 0)
+               return 0;
+
        if (++path_count[nests] > path_limits[nests])
                return -1;
        return 0;
index 92ce83a..b0695a9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
 #include <trace/events/task.h>
 #include "internal.h"
 
+#include <trace/events/sched.h>
+
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 unsigned int core_pipe_limit;
@@ -1402,9 +1404,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
                         */
                        bprm->recursion_depth = depth;
                        if (retval >= 0) {
-                               if (depth == 0)
-                                       ptrace_event(PTRACE_EVENT_EXEC,
-                                                       old_pid);
+                               if (depth == 0) {
+                                       trace_sched_process_exec(current, old_pid, bprm);
+                                       ptrace_event(PTRACE_EVENT_EXEC, old_pid);
+                               }
                                put_binfmt(fmt);
                                allow_write_access(bprm->file);
                                if (bprm->file)
@@ -1915,7 +1918,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 {
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
-       struct completion *vfork_done;
        int core_waiters = -EBUSY;
 
        init_completion(&core_state->startup);
@@ -1927,22 +1929,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
                core_waiters = zap_threads(tsk, mm, core_state, exit_code);
        up_write(&mm->mmap_sem);
 
-       if (unlikely(core_waiters < 0))
-               goto fail;
-
-       /*
-        * Make sure nobody is waiting for us to release the VM,
-        * otherwise we can deadlock when we wait on each other
-        */
-       vfork_done = tsk->vfork_done;
-       if (vfork_done) {
-               tsk->vfork_done = NULL;
-               complete(vfork_done);
-       }
-
-       if (core_waiters)
+       if (core_waiters > 0)
                wait_for_completion(&core_state->startup);
-fail:
+
        return core_waiters;
 }
 
index d3ebdbe..83ab215 100644 (file)
@@ -938,8 +938,7 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
                struct file_system_type *type = inode->i_sb->s_type;
 
                /* Set new key only if filesystem hasn't already changed it */
-               if (!lockdep_match_class(&inode->i_mutex,
-                   &type->i_mutex_key)) {
+               if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
                        /*
                         * ensure nobody is actually holding i_mutex
                         */
@@ -966,6 +965,7 @@ void unlock_new_inode(struct inode *inode)
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW;
+       smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
 }
index e2ba628..fa96a26 100644 (file)
@@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
        return 1;
 }
 
+/*
+ * We can do the critical dentry name comparison and hashing
+ * operations one word at a time, but we are limited to:
+ *
+ * - Architectures with fast unaligned word accesses. We could
+ *   do a "get_unaligned()" if this helps and is sufficiently
+ *   fast.
+ *
+ * - Little-endian machines (so that we can generate the mask
+ *   of low bytes efficiently). Again, we *could* do a byte
+ *   swapping load on big-endian architectures if that is not
+ *   expensive enough to make the optimization worthless.
+ *
+ * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
+ *   do not trap on the (extremely unlikely) case of a page
+ *   crossing operation.
+ *
+ * - Furthermore, we need an efficient 64-bit compile for the
+ *   64-bit case in order to generate the "number of bytes in
+ *   the final mask". Again, that could be replaced with a
+ *   efficient population count instruction or similar.
+ */
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+       return mask*0x0001020304050608 >> 56;
+}
+
+static inline unsigned int fold_hash(unsigned long hash)
+{
+       hash += hash >> (8*sizeof(int));
+       return hash;
+}
+
+#else  /* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+       /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+       long a = (0x0ff0001+mask) >> 23;
+       /* Fix the 1 for 00 case */
+       return a & mask;
+}
+
+#define fold_hash(x) (x)
+
+#endif
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+       unsigned long a, mask;
+       unsigned long hash = 0;
+
+       for (;;) {
+               a = *(unsigned long *)name;
+               hash *= 9;
+               if (len < sizeof(unsigned long))
+                       break;
+               hash += a;
+               name += sizeof(unsigned long);
+               len -= sizeof(unsigned long);
+               if (!len)
+                       goto done;
+       }
+       mask = ~(~0ul << len*8);
+       hash += mask & a;
+done:
+       return fold_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+#define ONEBYTES       0x0101010101010101ul
+#define SLASHBYTES     0x2f2f2f2f2f2f2f2ful
+#define HIGHBITS       0x8080808080808080ul
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+       return ((a - ONEBYTES) & ~a) & HIGHBITS;
+}
+
+/*
+ * Calculate the length and hash of the path component, and
+ * return the length of the component;
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+       unsigned long a, mask, hash, len;
+
+       hash = a = 0;
+       len = -sizeof(unsigned long);
+       do {
+               hash = (hash + a) * 9;
+               len += sizeof(unsigned long);
+               a = *(unsigned long *)(name+len);
+               /* Do we have any NUL or '/' bytes in this word? */
+               mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
+       } while (!mask);
+
+       /* The mask *below* the first high bit set */
+       mask = (mask - 1) & ~mask;
+       mask >>= 7;
+       hash += a & mask;
+       *hashp = fold_hash(hash);
+
+       return len + count_masked_bytes(mask);
+}
+
+#else
+
 unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 {
        unsigned long hash = init_name_hash();
@@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
        return len;
 }
 
+#endif
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
@@ -2162,7 +2284,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
                /* sayonara */
                error = complete_walk(nd);
                if (error)
-                       return ERR_PTR(-ECHILD);
+                       return ERR_PTR(error);
 
                error = -ENOTDIR;
                if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2261,7 +2383,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
        error = complete_walk(nd);
        if (error)
-               goto exit;
+               return ERR_PTR(error);
        error = -EISDIR;
        if (S_ISDIR(nd->inode->i_mode))
                goto exit;
index d327140..501b7f8 100644 (file)
@@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
        nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
        nilfs->ns_r_segments_percentage =
                le32_to_cpu(sbp->s_r_segments_percentage);
+       if (nilfs->ns_r_segments_percentage < 1 ||
+           nilfs->ns_r_segments_percentage > 99) {
+               printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
+               return -EINVAL;
+       }
+
        nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
        nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
        return 0;
@@ -515,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
                brelse(sbh[1]);
                sbh[1] = NULL;
                sbp[1] = NULL;
+               valid[1] = 0;
                swp = 0;
        }
        if (!valid[swp]) {
index d4548dd..965d4bd 100644 (file)
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
        if (!p)
                return -ESRCH;
 
-       err = nice;
-       err = proc_sched_autogroup_set_nice(p, &err);
+       err = proc_sched_autogroup_set_nice(p, nice);
        if (err)
                count = err;
 
index dca0c38..d567b84 100644 (file)
@@ -201,12 +201,10 @@ out:
 static int udf_release_file(struct inode *inode, struct file *filp)
 {
        if (filp->f_mode & FMODE_WRITE) {
-               mutex_lock(&inode->i_mutex);
                down_write(&UDF_I(inode)->i_data_sem);
                udf_discard_prealloc(inode);
                udf_truncate_tail_extent(inode);
                up_write(&UDF_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
        }
        return 0;
 }
index 514ed45..d117b29 100644 (file)
@@ -23,6 +23,8 @@
 #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H
 #define ASM_ARM_HARDWARE_SERIAL_AMBA_H
 
+#include <linux/types.h>
+
 /* -------------------------------------------------------------------------------
  *  From AMBA UART (PL010) Block Specification
  * -------------------------------------------------------------------------------
index 081147d..fbe89e1 100644 (file)
@@ -319,13 +319,6 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
        __clocksource_updatefreq_scale(cs, 1000, khz);
 }
 
-static inline void
-clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
-{
-       return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-                                     NSEC_PER_SEC, minsec);
-}
-
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void
 update_vsyscall(struct timespec *ts, struct timespec *wtm,
index 4270bed..ff5f525 100644 (file)
@@ -47,26 +47,6 @@ struct dentry_stat_t {
 };
 extern struct dentry_stat_t dentry_stat;
 
-/*
- * Compare 2 name strings, return 0 if they match, otherwise non-zero.
- * The strings are both count bytes long, and count is non-zero.
- */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-                               const unsigned char *ct, size_t tcount)
-{
-       if (scount != tcount)
-               return 1;
-
-       do {
-               if (*cs != *ct)
-                       return 1;
-               cs++;
-               ct++;
-               tcount--;
-       } while (tcount);
-       return 0;
-}
-
 /* Name hashing routines. Initial hash value */
 /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
 #define init_name_hash()               0
index 028e26f..72a6cab 100644 (file)
@@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
+/*
+ * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
+ * set in the flags member.
+ *
+ * ENABLED - set/unset when ftrace_ops is registered/unregistered
+ * GLOBAL  - set manualy by ftrace_ops user to denote the ftrace_ops
+ *           is part of the global tracers sharing the same filter
+ *           via set_ftrace_* debugfs files.
+ * DYNAMIC - set when ftrace_ops is registered to denote dynamically
+ *           allocated ftrace_ops which need special care
+ * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
+ *           could be controled by following calls:
+ *             ftrace_function_local_enable
+ *             ftrace_function_local_disable
+ */
 enum {
        FTRACE_OPS_FL_ENABLED           = 1 << 0,
        FTRACE_OPS_FL_GLOBAL            = 1 << 1,
        FTRACE_OPS_FL_DYNAMIC           = 1 << 2,
+       FTRACE_OPS_FL_CONTROL           = 1 << 3,
 };
 
 struct ftrace_ops {
        ftrace_func_t                   func;
        struct ftrace_ops               *next;
        unsigned long                   flags;
+       int __percpu                    *disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
        struct ftrace_hash              *notrace_hash;
        struct ftrace_hash              *filter_hash;
@@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
 void clear_ftrace_function(void);
 
+/**
+ * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu
+ *
+ * This function enables tracing on current cpu by decreasing
+ * the per cpu control variable.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline void ftrace_function_local_enable(struct ftrace_ops *ops)
+{
+       if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+               return;
+
+       (*this_cpu_ptr(ops->disabled))--;
+}
+
+/**
+ * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu
+ *
+ * This function enables tracing on current cpu by decreasing
+ * the per cpu control variable.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline void ftrace_function_local_disable(struct ftrace_ops *ops)
+{
+       if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+               return;
+
+       (*this_cpu_ptr(ops->disabled))++;
+}
+
+/**
+ * ftrace_function_local_disabled - returns ftrace_ops disabled value
+ *                                  on current cpu
+ *
+ * This function returns value of ftrace_ops::disabled on current cpu.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
+{
+       WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL));
+       return *this_cpu_ptr(ops->disabled);
+}
+
 extern void ftrace_stub(unsigned long a0, unsigned long a1);
 
 #else /* !CONFIG_FUNCTION_TRACER */
@@ -178,12 +244,13 @@ struct dyn_ftrace {
 };
 
 int ftrace_force_update(void);
-void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
                       int len, int reset);
-void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
                        int len, int reset);
 void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
 void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
+void ftrace_free_filter(struct ftrace_ops *ops);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void);
 #else
 static inline int skip_trace(unsigned long ip) { return 0; }
 static inline int ftrace_force_update(void) { return 0; }
-static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
-{
-}
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
 static inline void ftrace_release_mod(struct module *mod) {}
@@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end)
  */
 #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
 #define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_free_filter(ops) do { } while (0)
 
 static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
                            size_t cnt, loff_t *ppos) { return -ENODEV; }
index c3da42d..dd478fc 100644 (file)
@@ -146,6 +146,10 @@ enum trace_reg {
        TRACE_REG_UNREGISTER,
        TRACE_REG_PERF_REGISTER,
        TRACE_REG_PERF_UNREGISTER,
+       TRACE_REG_PERF_OPEN,
+       TRACE_REG_PERF_CLOSE,
+       TRACE_REG_PERF_ADD,
+       TRACE_REG_PERF_DEL,
 };
 
 struct ftrace_event_call;
@@ -157,7 +161,7 @@ struct ftrace_event_class {
        void                    *perf_probe;
 #endif
        int                     (*reg)(struct ftrace_event_call *event,
-                                      enum trace_reg type);
+                                      enum trace_reg type, void *data);
        int                     (*define_fields)(struct ftrace_event_call *);
        struct list_head        *(*get_fields)(struct ftrace_event_call *);
        struct list_head        fields;
@@ -165,7 +169,7 @@ struct ftrace_event_class {
 };
 
 extern int ftrace_event_reg(struct ftrace_event_call *event,
-                           enum trace_reg type);
+                           enum trace_reg type, void *data);
 
 enum {
        TRACE_EVENT_FL_ENABLED_BIT,
@@ -241,6 +245,7 @@ enum {
        FILTER_STATIC_STRING,
        FILTER_DYN_STRING,
        FILTER_PTR_STRING,
+       FILTER_TRACE_FN,
 };
 
 #define EVENT_STORAGE_SIZE 128
index fe23ee7..e61d319 100644 (file)
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf);
 
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
                                                     int partno, sector_t start,
                                                     sector_t len, int flags,
index 9c66b1a..f994d51 100644 (file)
@@ -149,7 +149,7 @@ extern struct cred init_cred;
        },                                                              \
        .rt             = {                                             \
                .run_list       = LIST_HEAD_INIT(tsk.rt.run_list),      \
-               .time_slice     = HZ,                                   \
+               .time_slice     = RR_TIMESLICE,                         \
                .nr_cpus_allowed = NR_CPUS,                             \
        },                                                              \
        .tasks          = LIST_HEAD_INIT(tsk.tasks),                    \
index a64b00e..3f830e0 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
-#include <trace/events/irq.h>
 
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -456,11 +455,7 @@ asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
-static inline void __raise_softirq_irqoff(unsigned int nr)
-{
-       trace_softirq_raise(nr);
-       or_softirq_pending(1UL << nr);
-}
+extern void __raise_softirq_irqoff(unsigned int nr);
 
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
index 119773e..1a30180 100644 (file)
@@ -6,8 +6,11 @@
 #include <linux/workqueue.h>
 
 enum {
-       ICQ_IOPRIO_CHANGED,
-       ICQ_CGROUP_CHANGED,
+       ICQ_IOPRIO_CHANGED      = 1 << 0,
+       ICQ_CGROUP_CHANGED      = 1 << 1,
+       ICQ_EXITED              = 1 << 2,
+
+       ICQ_CHANGED_MASK        = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED,
 };
 
 /*
@@ -88,7 +91,7 @@ struct io_cq {
                struct rcu_head         __rcu_head;
        };
 
-       unsigned long           changed;
+       unsigned int            flags;
 };
 
 /*
@@ -139,6 +142,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
                                       gfp_t gfp_flags, int node);
 void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
+unsigned int icq_get_changed(struct io_cq *icq);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
index 5ce8b14..c513a40 100644 (file)
@@ -1,22 +1,69 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
+/*
+ * Jump label support
+ *
+ * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Jump labels provide an interface to generate dynamic branches using
+ * self-modifying code. Assuming toolchain and architecture support the result
+ * of a "if (static_key_false(&key))" statement is a unconditional branch (which
+ * defaults to false - and the true block is placed out of line).
+ *
+ * However at runtime we can change the branch target using
+ * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
+ * object and for as long as there are references all branches referring to
+ * that particular key will point to the (out of line) true block.
+ *
+ * Since this relies on modifying code the static_key_slow_{inc,dec}() functions
+ * must be considered absolute slow paths (machine wide synchronization etc.).
+ * OTOH, since the affected branches are unconditional their runtime overhead
+ * will be absolutely minimal, esp. in the default (off) case where the total
+ * effect is a single NOP of appropriate size. The on case will patch in a jump
+ * to the out-of-line block.
+ *
+ * When the control is directly exposed to userspace it is prudent to delay the
+ * decrement to avoid high frequency code modifications which can (and do)
+ * cause significant performance degradation. Struct static_key_deferred and
+ * static_key_slow_dec_deferred() provide for this.
+ *
+ * Lacking toolchain and or architecture support, it falls back to a simple
+ * conditional branch.
+ *
+ * struct static_key my_key = STATIC_KEY_INIT_TRUE;
+ *
+ *   if (static_key_true(&my_key)) {
+ *   }
+ *
+ * will result in the true case being in-line and starts the key with a single
+ * reference. Mixing static_key_true() and static_key_false() on the same key is not
+ * allowed.
+ *
+ * Not initializing the key (static data is initialized to 0s anyway) is the
+ * same as using STATIC_KEY_INIT_FALSE and static_key_false() is
+ * equivalent with static_branch().
+ *
+*/
+
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/workqueue.h>
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 
-struct jump_label_key {
+struct static_key {
        atomic_t enabled;
+/* Set lsb bit to 1 if branch is default true, 0 ot */
        struct jump_entry *entries;
 #ifdef CONFIG_MODULES
-       struct jump_label_mod *next;
+       struct static_key_mod *next;
 #endif
 };
 
-struct jump_label_key_deferred {
-       struct jump_label_key key;
+struct static_key_deferred {
+       struct static_key key;
        unsigned long timeout;
        struct delayed_work work;
 };
@@ -34,13 +81,34 @@ struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
-#ifdef CONFIG_MODULES
-#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL}
-#else
-#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL}
-#endif
+#define JUMP_LABEL_TRUE_BRANCH 1UL
+
+static
+inline struct jump_entry *jump_label_get_entries(struct static_key *key)
+{
+       return (struct jump_entry *)((unsigned long)key->entries
+                                               & ~JUMP_LABEL_TRUE_BRANCH);
+}
 
-static __always_inline bool static_branch(struct jump_label_key *key)
+static inline bool jump_label_get_branch_default(struct static_key *key)
+{
+       if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH)
+               return true;
+       return false;
+}
+
+static __always_inline bool static_key_false(struct static_key *key)
+{
+       return arch_static_branch(key);
+}
+
+static __always_inline bool static_key_true(struct static_key *key)
+{
+       return !static_key_false(key);
+}
+
+/* Deprecated. Please use 'static_key_false() instead. */
+static __always_inline bool static_branch(struct static_key *key)
 {
        return arch_static_branch(key);
 }
@@ -56,21 +124,23 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
 extern void arch_jump_label_transform_static(struct jump_entry *entry,
                                             enum jump_label_type type);
 extern int jump_label_text_reserved(void *start, void *end);
-extern void jump_label_inc(struct jump_label_key *key);
-extern void jump_label_dec(struct jump_label_key *key);
-extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
-extern bool jump_label_enabled(struct jump_label_key *key);
+extern void static_key_slow_inc(struct static_key *key);
+extern void static_key_slow_dec(struct static_key *key);
+extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
 extern void jump_label_apply_nops(struct module *mod);
-extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
-               unsigned long rl);
+extern void
+jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
+
+#define STATIC_KEY_INIT_TRUE ((struct static_key) \
+       { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
+#define STATIC_KEY_INIT_FALSE ((struct static_key) \
+       { .enabled = ATOMIC_INIT(0), .entries = (void *)0 })
 
 #else  /* !HAVE_JUMP_LABEL */
 
 #include <linux/atomic.h>
 
-#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
-
-struct jump_label_key {
+struct static_key {
        atomic_t enabled;
 };
 
@@ -78,30 +148,45 @@ static __always_inline void jump_label_init(void)
 {
 }
 
-struct jump_label_key_deferred {
-       struct jump_label_key  key;
+struct static_key_deferred {
+       struct static_key  key;
 };
 
-static __always_inline bool static_branch(struct jump_label_key *key)
+static __always_inline bool static_key_false(struct static_key *key)
+{
+       if (unlikely(atomic_read(&key->enabled)) > 0)
+               return true;
+       return false;
+}
+
+static __always_inline bool static_key_true(struct static_key *key)
 {
-       if (unlikely(atomic_read(&key->enabled)))
+       if (likely(atomic_read(&key->enabled)) > 0)
                return true;
        return false;
 }
 
-static inline void jump_label_inc(struct jump_label_key *key)
+/* Deprecated. Please use 'static_key_false() instead. */
+static __always_inline bool static_branch(struct static_key *key)
+{
+       if (unlikely(atomic_read(&key->enabled)) > 0)
+               return true;
+       return false;
+}
+
+static inline void static_key_slow_inc(struct static_key *key)
 {
        atomic_inc(&key->enabled);
 }
 
-static inline void jump_label_dec(struct jump_label_key *key)
+static inline void static_key_slow_dec(struct static_key *key)
 {
        atomic_dec(&key->enabled);
 }
 
-static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
+static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
-       jump_label_dec(&key->key);
+       static_key_slow_dec(&key->key);
 }
 
 static inline int jump_label_text_reserved(void *start, void *end)
@@ -112,23 +197,30 @@ static inline int jump_label_text_reserved(void *start, void *end)
 static inline void jump_label_lock(void) {}
 static inline void jump_label_unlock(void) {}
 
-static inline bool jump_label_enabled(struct jump_label_key *key)
-{
-       return !!atomic_read(&key->enabled);
-}
-
 static inline int jump_label_apply_nops(struct module *mod)
 {
        return 0;
 }
 
-static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
+static inline void
+jump_label_rate_limit(struct static_key_deferred *key,
                unsigned long rl)
 {
 }
+
+#define STATIC_KEY_INIT_TRUE ((struct static_key) \
+               { .enabled = ATOMIC_INIT(1) })
+#define STATIC_KEY_INIT_FALSE ((struct static_key) \
+               { .enabled = ATOMIC_INIT(0) })
+
 #endif /* HAVE_JUMP_LABEL */
 
-#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
-#define jump_label_key_disabled        ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
+#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
+#define jump_label_enabled static_key_enabled
+
+static inline bool static_key_enabled(struct static_key *key)
+{
+       return (atomic_read(&key->enabled) > 0);
+}
 
 #endif /* _LINUX_JUMP_LABEL_H */
index e834342..d801acb 100644 (file)
 }                                                      \
 )
 
+/*
+ * Multiplies an integer by a fraction, while avoiding unnecessary
+ * overflow or loss of precision.
+ */
+#define mult_frac(x, numer, denom)(                    \
+{                                                      \
+       typeof(x) quot = (x) / (denom);                 \
+       typeof(x) rem  = (x) % (denom);                 \
+       (quot * (numer)) + ((rem * (numer)) / (denom)); \
+}                                                      \
+)
+
+
 #define _RET_IP_               (unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
 
index fee6631..35f7237 100644 (file)
 #include <linux/errno.h>
 #include <linux/list.h>
 
+/*
+ * Keep this list arranged in rough order of priority. Anything listed after
+ * KMSG_DUMP_OOPS will not be logged by default unless printk.always_kmsg_dump
+ * is passed to the kernel.
+ */
 enum kmsg_dump_reason {
-       KMSG_DUMP_OOPS,
        KMSG_DUMP_PANIC,
+       KMSG_DUMP_OOPS,
+       KMSG_DUMP_EMERG,
        KMSG_DUMP_RESTART,
        KMSG_DUMP_HALT,
        KMSG_DUMP_POWEROFF,
-       KMSG_DUMP_EMERG,
 };
 
 /**
index 23fcdfc..b8ba855 100644 (file)
@@ -6,6 +6,8 @@
 
 #if BITS_PER_LONG == 64
 
+#define div64_long(x,y) div64_s64((x),(y))
+
 /**
  * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
  *
@@ -45,6 +47,8 @@ static inline s64 div64_s64(s64 dividend, s64 divisor)
 
 #elif BITS_PER_LONG == 32
 
+#define div64_long(x,y) div_s64((x),(y))
+
 #ifndef div_u64_rem
 static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 {
index 4d34356..b80de52 100644 (file)
@@ -129,7 +129,6 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
                                        struct page *newpage);
 
-extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -392,10 +391,6 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
                                struct page *newpage)
 {
 }
-
-static inline void mem_cgroup_reset_owner(struct page *page)
-{
-}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
index f486f63..3e5cb25 100644 (file)
@@ -214,8 +214,8 @@ enum {
 #include <linux/skbuff.h>
 
 #ifdef CONFIG_RPS
-#include <linux/jump_label.h>
-extern struct jump_label_key rps_needed;
+#include <linux/static_key.h>
+extern struct static_key rps_needed;
 #endif
 
 struct neighbour;
index b809265..29734be 100644 (file)
@@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
 extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
 #if defined(CONFIG_JUMP_LABEL)
-#include <linux/jump_label.h>
-extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+#include <linux/static_key.h>
+extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
 {
        if (__builtin_constant_p(pf) &&
            __builtin_constant_p(hook))
-               return static_branch(&nf_hooks_needed[pf][hook]);
+               return static_key_false(&nf_hooks_needed[pf][hook]);
 
        return !list_empty(&nf_hooks[pf][hook]);
 }
index a75a831..92cf6ad 100644 (file)
@@ -281,6 +281,14 @@ static inline struct property *of_find_property(const struct device_node *np,
        return NULL;
 }
 
+static inline struct device_node *of_find_compatible_node(
+                                               struct device_node *from,
+                                               const char *type,
+                                               const char *compat)
+{
+       return NULL;
+}
+
 static inline int of_property_read_u32_array(const struct device_node *np,
                                             const char *propname,
                                             u32 *out_values, size_t sz)
index 32cd1f6..21638ae 100644 (file)
@@ -348,9 +348,9 @@ do {                                                                        \
 #define _this_cpu_generic_to_op(pcp, val, op)                          \
 do {                                                                   \
        unsigned long flags;                                            \
-       local_irq_save(flags);                                          \
+       raw_local_irq_save(flags);                                      \
        *__this_cpu_ptr(&(pcp)) op val;                                 \
-       local_irq_restore(flags);                                       \
+       raw_local_irq_restore(flags);                                   \
 } while (0)
 
 #ifndef this_cpu_write
@@ -449,10 +449,10 @@ do {                                                                      \
 ({                                                                     \
        typeof(pcp) ret__;                                              \
        unsigned long flags;                                            \
-       local_irq_save(flags);                                          \
+       raw_local_irq_save(flags);                                      \
        __this_cpu_add(pcp, val);                                       \
        ret__ = __this_cpu_read(pcp);                                   \
-       local_irq_restore(flags);                                       \
+       raw_local_irq_restore(flags);                                   \
        ret__;                                                          \
 })
 
@@ -479,10 +479,10 @@ do {                                                                      \
 #define _this_cpu_generic_xchg(pcp, nval)                              \
 ({     typeof(pcp) ret__;                                              \
        unsigned long flags;                                            \
-       local_irq_save(flags);                                          \
+       raw_local_irq_save(flags);                                      \
        ret__ = __this_cpu_read(pcp);                                   \
        __this_cpu_write(pcp, nval);                                    \
-       local_irq_restore(flags);                                       \
+       raw_local_irq_restore(flags);                                   \
        ret__;                                                          \
 })
 
@@ -507,11 +507,11 @@ do {                                                                      \
 ({                                                                     \
        typeof(pcp) ret__;                                              \
        unsigned long flags;                                            \
-       local_irq_save(flags);                                          \
+       raw_local_irq_save(flags);                                      \
        ret__ = __this_cpu_read(pcp);                                   \
        if (ret__ == (oval))                                            \
                __this_cpu_write(pcp, nval);                            \
-       local_irq_restore(flags);                                       \
+       raw_local_irq_restore(flags);                                   \
        ret__;                                                          \
 })
 
@@ -544,10 +544,10 @@ do {                                                                      \
 ({                                                                     \
        int ret__;                                                      \
        unsigned long flags;                                            \
-       local_irq_save(flags);                                          \
+       raw_local_irq_save(flags);                                      \
        ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2,           \
                        oval1, oval2, nval1, nval2);                    \
-       local_irq_restore(flags);                                       \
+       raw_local_irq_restore(flags);                                   \
        ret__;                                                          \
 })
 
@@ -718,12 +718,13 @@ do {                                                                      \
 # ifndef __this_cpu_add_return_8
 #  define __this_cpu_add_return_8(pcp, val)    __this_cpu_generic_add_return(pcp, val)
 # endif
-# define __this_cpu_add_return(pcp, val)       __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+# define __this_cpu_add_return(pcp, val)       \
+       __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
-#define __this_cpu_sub_return(pcp, val)        this_cpu_add_return(pcp, -(val))
-#define __this_cpu_inc_return(pcp)     this_cpu_add_return(pcp, 1)
-#define __this_cpu_dec_return(pcp)     this_cpu_add_return(pcp, -1)
+#define __this_cpu_sub_return(pcp, val)        __this_cpu_add_return(pcp, -(val))
+#define __this_cpu_inc_return(pcp)     __this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)     __this_cpu_add_return(pcp, -1)
 
 #define __this_cpu_generic_xchg(pcp, nval)                             \
 ({     typeof(pcp) ret__;                                              \
index abb2776..bd9f55a 100644 (file)
@@ -129,11 +129,40 @@ enum perf_event_sample_format {
        PERF_SAMPLE_PERIOD                      = 1U << 8,
        PERF_SAMPLE_STREAM_ID                   = 1U << 9,
        PERF_SAMPLE_RAW                         = 1U << 10,
+       PERF_SAMPLE_BRANCH_STACK                = 1U << 11,
 
-       PERF_SAMPLE_MAX = 1U << 11,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 12,             /* non-ABI */
 };
 
 /*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type {
+       PERF_SAMPLE_BRANCH_USER         = 1U << 0, /* user branches */
+       PERF_SAMPLE_BRANCH_KERNEL       = 1U << 1, /* kernel branches */
+       PERF_SAMPLE_BRANCH_HV           = 1U << 2, /* hypervisor branches */
+
+       PERF_SAMPLE_BRANCH_ANY          = 1U << 3, /* any branch types */
+       PERF_SAMPLE_BRANCH_ANY_CALL     = 1U << 4, /* any call branch */
+       PERF_SAMPLE_BRANCH_ANY_RETURN   = 1U << 5, /* any return branch */
+       PERF_SAMPLE_BRANCH_IND_CALL     = 1U << 6, /* indirect calls */
+
+       PERF_SAMPLE_BRANCH_MAX          = 1U << 7, /* non-ABI */
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+       (PERF_SAMPLE_BRANCH_USER|\
+        PERF_SAMPLE_BRANCH_KERNEL|\
+        PERF_SAMPLE_BRANCH_HV)
+
+/*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
  *
@@ -163,6 +192,8 @@ enum perf_event_read_format {
 };
 
 #define PERF_ATTR_SIZE_VER0    64      /* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1    72      /* add: config2 */
+#define PERF_ATTR_SIZE_VER2    80      /* add: branch_sample_type */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -240,6 +271,7 @@ struct perf_event_attr {
                __u64           bp_len;
                __u64           config2; /* extension of config1 */
        };
+       __u64   branch_sample_type; /* enum branch_sample_type */
 };
 
 /*
@@ -291,12 +323,14 @@ struct perf_event_mmap_page {
        __s64   offset;                 /* add to hardware event value */
        __u64   time_enabled;           /* time event active */
        __u64   time_running;           /* time event on cpu */
+       __u32   time_mult, time_shift;
+       __u64   time_offset;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u64   __reserved[123];        /* align to 1k */
+       __u64   __reserved[121];        /* align to 1k */
 
        /*
         * Control data for the mmap() data buffer.
@@ -456,6 +490,8 @@ enum perf_event_type {
         *
         *      { u32                   size;
         *        char                  data[size];}&& PERF_SAMPLE_RAW
+        *
+        *      { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
         * };
         */
        PERF_RECORD_SAMPLE                      = 9,
@@ -512,7 +548,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 #include <linux/atomic.h>
 #include <asm/local.h>
 
@@ -528,12 +564,34 @@ struct perf_raw_record {
        void                            *data;
 };
 
+/*
+ * single taken branch record layout:
+ *
+ *      from: source instruction (may not always be a branch insn)
+ *        to: branch target
+ *   mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ */
 struct perf_branch_entry {
-       __u64                           from;
-       __u64                           to;
-       __u64                           flags;
+       __u64   from;
+       __u64   to;
+       __u64   mispred:1,  /* target mispredicted */
+               predicted:1,/* target predicted */
+               reserved:62;
 };
 
+/*
+ * branch stack layout:
+ *  nr: number of taken branches stored in entries[]
+ *
+ * Note that nr can vary from sample to sample
+ * branches (to, from) are stored from most recent
+ * to least recent, i.e., entries[0] contains the most
+ * recent branch.
+ */
 struct perf_branch_stack {
        __u64                           nr;
        struct perf_branch_entry        entries[0];
@@ -564,7 +622,9 @@ struct hw_perf_event {
                        unsigned long   event_base;
                        int             idx;
                        int             last_cpu;
+
                        struct hw_perf_event_extra extra_reg;
+                       struct hw_perf_event_extra branch_reg;
                };
                struct { /* software */
                        struct hrtimer  hrtimer;
@@ -616,6 +676,7 @@ struct pmu {
        struct list_head                entry;
 
        struct device                   *dev;
+       const struct attribute_group    **attr_groups;
        char                            *name;
        int                             type;
 
@@ -681,6 +742,17 @@ struct pmu {
         * for each successful ->add() during the transaction.
         */
        void (*cancel_txn)              (struct pmu *pmu); /* optional */
+
+       /*
+        * Will return the value for perf_event_mmap_page::index for this event,
+        * if no implementation is provided it will default to: event->hw.idx + 1.
+        */
+       int (*event_idx)                (struct perf_event *event); /*optional */
+
+       /*
+        * flush branch stack on context-switches (needed in cpu-wide mode)
+        */
+       void (*flush_branch_stack)      (void);
 };
 
 /**
@@ -850,6 +922,9 @@ struct perf_event {
 #ifdef CONFIG_EVENT_TRACING
        struct ftrace_event_call        *tp_event;
        struct event_filter             *filter;
+#ifdef CONFIG_FUNCTION_TRACER
+       struct ftrace_ops               ftrace_ops;
+#endif
 #endif
 
 #ifdef CONFIG_CGROUP_PERF
@@ -911,7 +986,8 @@ struct perf_event_context {
        u64                             parent_gen;
        u64                             generation;
        int                             pin_count;
-       int                             nr_cgroups; /* cgroup events present */
+       int                             nr_cgroups;      /* cgroup evts */
+       int                             nr_branch_stack; /* branch_stack evt */
        struct rcu_head                 rcu_head;
 };
 
@@ -976,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 extern u64 perf_event_read_value(struct perf_event *event,
                                 u64 *enabled, u64 *running);
 
+
 struct perf_sample_data {
        u64                             type;
 
@@ -995,12 +1072,14 @@ struct perf_sample_data {
        u64                             period;
        struct perf_callchain_entry     *callchain;
        struct perf_raw_record          *raw;
+       struct perf_branch_stack        *br_stack;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
        data->addr = addr;
        data->raw  = NULL;
+       data->br_stack = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1029,7 +1108,7 @@ static inline int is_software_event(struct perf_event *event)
        return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
-extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
 
@@ -1057,7 +1136,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 {
        struct pt_regs hot_regs;
 
-       if (static_branch(&perf_swevent_enabled[event_id])) {
+       if (static_key_false(&perf_swevent_enabled[event_id])) {
                if (!regs) {
                        perf_fetch_caller_regs(&hot_regs);
                        regs = &hot_regs;
@@ -1066,12 +1145,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
        }
 }
 
-extern struct jump_label_key_deferred perf_sched_events;
+extern struct static_key_deferred perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *prev,
                                            struct task_struct *task)
 {
-       if (static_branch(&perf_sched_events.key))
+       if (static_key_false(&perf_sched_events.key))
                __perf_event_task_sched_in(prev, task);
 }
 
@@ -1080,7 +1159,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
 {
        perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
-       if (static_branch(&perf_sched_events.key))
+       if (static_key_false(&perf_sched_events.key))
                __perf_event_task_sched_out(prev, next);
 }
 
@@ -1139,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 # define perf_instruction_pointer(regs)        instruction_pointer(regs)
 #endif
 
+static inline bool has_branch_stack(struct perf_event *event)
+{
+       return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
 extern int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
index 58969b2..5a710b9 100644 (file)
@@ -48,12 +48,14 @@ do { \
        barrier(); \
 } while (0)
 
-#define preempt_enable_no_resched() \
+#define sched_preempt_enable_no_resched() \
 do { \
        barrier(); \
        dec_preempt_count(); \
 } while (0)
 
+#define preempt_enable_no_resched()    sched_preempt_enable_no_resched()
+
 #define preempt_enable() \
 do { \
        preempt_enable_no_resched(); \
@@ -92,6 +94,7 @@ do { \
 #else /* !CONFIG_PREEMPT_COUNT */
 
 #define preempt_disable()              do { } while (0)
+#define sched_preempt_enable_no_resched()      do { } while (0)
 #define preempt_enable_no_resched()    do { } while (0)
 #define preempt_enable()               do { } while (0)
 
index f9abd93..0525927 100644 (file)
@@ -101,6 +101,11 @@ asmlinkage __printf(1, 2) __cold
 int printk(const char *fmt, ...);
 
 /*
+ * Special printk facility for scheduler use only, _DO_NOT_USE_ !
+ */
+__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
+
+/*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
  * with all other unrelated printk_ratelimit() callsites.  Instead use
  * printk_ratelimited() or plain old __ratelimit().
@@ -127,6 +132,11 @@ int printk(const char *s, ...)
 {
        return 0;
 }
+static inline __printf(1, 2) __cold
+int printk_sched(const char *s, ...)
+{
+       return 0;
+}
 static inline int printk_ratelimit(void)
 {
        return 0;
index 81c04f4..9372174 100644 (file)
@@ -190,6 +190,33 @@ extern void rcu_idle_exit(void);
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 
+/**
+ * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
+ * @a: Code that RCU needs to pay attention to.
+ *
+ * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
+ * in the inner idle loop, that is, between the rcu_idle_enter() and
+ * the rcu_idle_exit() -- RCU will happily ignore any such read-side
+ * critical sections.  However, things like powertop need tracepoints
+ * in the inner idle loop.
+ *
+ * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
+ * will tell RCU that it needs to pay attending, invoke its argument
+ * (in this example, a call to the do_something_with_RCU() function),
+ * and then tell RCU to go back to ignoring this CPU.  It is permissible
+ * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
+ * quite limited.  If deeper nesting is required, it will be necessary
+ * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
+ *
+ * This macro may be used from process-level code only.
+ */
+#define RCU_NONIDLE(a) \
+       do { \
+               rcu_idle_exit(); \
+               do { a; } while (0); \
+               rcu_idle_enter(); \
+       } while (0)
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -226,6 +253,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
+bool rcu_lockdep_current_cpu_online(void);
+#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+static inline bool rcu_lockdep_current_cpu_online(void)
+{
+       return 1;
+}
+#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 #ifdef CONFIG_PROVE_RCU
@@ -239,13 +275,11 @@ static inline int rcu_is_cpu_idle(void)
 
 static inline void rcu_lock_acquire(struct lockdep_map *map)
 {
-       WARN_ON_ONCE(rcu_is_cpu_idle());
        lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
 }
 
 static inline void rcu_lock_release(struct lockdep_map *map)
 {
-       WARN_ON_ONCE(rcu_is_cpu_idle());
        lock_release(map, 1, _THIS_IP_);
 }
 
@@ -270,6 +304,9 @@ extern int debug_lockdep_rcu_enabled(void);
  * occur in the same context, for example, it is illegal to invoke
  * rcu_read_unlock() in process context if the matching rcu_read_lock()
  * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
  */
 static inline int rcu_read_lock_held(void)
 {
@@ -277,6 +314,8 @@ static inline int rcu_read_lock_held(void)
                return 1;
        if (rcu_is_cpu_idle())
                return 0;
+       if (!rcu_lockdep_current_cpu_online())
+               return 0;
        return lock_is_held(&rcu_lock_map);
 }
 
@@ -313,6 +352,9 @@ extern int rcu_read_lock_bh_held(void);
  * notice an extended quiescent state to other CPUs that started a grace
  * period. Otherwise we would delay any grace period as long as we run in
  * the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
  */
 #ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
@@ -323,6 +365,8 @@ static inline int rcu_read_lock_sched_held(void)
                return 1;
        if (rcu_is_cpu_idle())
                return 0;
+       if (!rcu_lockdep_current_cpu_online())
+               return 0;
        if (debug_locks)
                lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
        return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
@@ -381,8 +425,22 @@ extern int rcu_my_thread_group_empty(void);
                }                                                       \
        } while (0)
 
+#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
+static inline void rcu_preempt_sleep_check(void)
+{
+       rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
+                          "Illegal context switch in RCU read-side "
+                          "critical section");
+}
+#else /* #ifdef CONFIG_PROVE_RCU */
+static inline void rcu_preempt_sleep_check(void)
+{
+}
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
+
 #define rcu_sleep_check()                                              \
        do {                                                            \
+               rcu_preempt_sleep_check();                              \
                rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),     \
                                   "Illegal context switch in RCU-bh"   \
                                   " read-side critical section");      \
@@ -470,6 +528,13 @@ extern int rcu_my_thread_group_empty(void);
  * NULL.  Although rcu_access_pointer() may also be used in cases where
  * update-side locks prevent the value of the pointer from changing, you
  * should instead use rcu_dereference_protected() for this use case.
+ *
+ * It is also permissible to use rcu_access_pointer() when read-side
+ * access to the pointer was removed at least one grace period ago, as
+ * is the case in the context of the RCU callback that is freeing up
+ * the data, or after a synchronize_rcu() returns.  This can be useful
+ * when tearing down multi-linked structures after a grace period
+ * has elapsed.
  */
 #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
 
@@ -659,6 +724,8 @@ static inline void rcu_read_lock(void)
        __rcu_read_lock();
        __acquire(RCU);
        rcu_lock_acquire(&rcu_lock_map);
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_lock() used illegally while idle");
 }
 
 /*
@@ -678,6 +745,8 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_unlock() used illegally while idle");
        rcu_lock_release(&rcu_lock_map);
        __release(RCU);
        __rcu_read_unlock();
@@ -705,6 +774,8 @@ static inline void rcu_read_lock_bh(void)
        local_bh_disable();
        __acquire(RCU_BH);
        rcu_lock_acquire(&rcu_bh_lock_map);
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_lock_bh() used illegally while idle");
 }
 
 /*
@@ -714,6 +785,8 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_unlock_bh() used illegally while idle");
        rcu_lock_release(&rcu_bh_lock_map);
        __release(RCU_BH);
        local_bh_enable();
@@ -737,6 +810,8 @@ static inline void rcu_read_lock_sched(void)
        preempt_disable();
        __acquire(RCU_SCHED);
        rcu_lock_acquire(&rcu_sched_lock_map);
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_lock_sched() used illegally while idle");
 }
 
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -753,6 +828,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "rcu_read_unlock_sched() used illegally while idle");
        rcu_lock_release(&rcu_sched_lock_map);
        __release(RCU_SCHED);
        preempt_enable();
@@ -841,7 +918,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
        /* See the kfree_rcu() header comment. */
        BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
 
-       call_rcu(head, (rcu_callback)offset);
+       kfree_call_rcu(head, (rcu_callback)offset);
 }
 
 /**
index 00b7a5e..e93df77 100644 (file)
 
 #include <linux/cache.h>
 
-#ifdef CONFIG_RCU_BOOST
 static inline void rcu_init(void)
 {
 }
-#else /* #ifdef CONFIG_RCU_BOOST */
-void rcu_init(void);
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 static inline void rcu_barrier_bh(void)
 {
@@ -83,6 +79,12 @@ static inline void synchronize_sched_expedited(void)
        synchronize_sched();
 }
 
+static inline void kfree_call_rcu(struct rcu_head *head,
+                                 void (*func)(struct rcu_head *rcu))
+{
+       call_rcu(head, func);
+}
+
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
index 6745846..e8ee5dd 100644 (file)
@@ -61,6 +61,24 @@ extern void synchronize_rcu_bh(void);
 extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 
+void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+
+/**
+ * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
+ *
+ * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_rcu_bh_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_rcu_bh() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
+ */
 static inline void synchronize_rcu_bh_expedited(void)
 {
        synchronize_sched_expedited();
@@ -83,6 +101,7 @@ extern void rcu_sched_force_quiescent_state(void);
 /* A context switch is a grace period for RCU-sched and RCU-bh. */
 static inline int rcu_blocking_is_gp(void)
 {
+       might_sleep();  /* Check for RCU read-side critical section. */
        return num_online_cpus() == 1;
 }
 
index 7d379a6..e074e1e 100644 (file)
@@ -361,6 +361,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
+extern void schedule_preempt_disabled(void);
 extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 
 struct nsproxy;
@@ -905,6 +906,7 @@ struct sched_group_power {
         * single CPU.
         */
        unsigned int power, power_orig;
+       unsigned long next_update;
        /*
         * Number of busy cpus in this group.
         */
@@ -1052,6 +1054,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
 
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1061,6 +1065,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
                        struct sched_domain_attr *dattr_new)
 {
 }
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+       return true;
+}
+
 #endif /* !CONFIG_SMP */
 
 
@@ -1225,6 +1235,12 @@ struct sched_rt_entity {
 #endif
 };
 
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE           (100 * HZ / 1000)
+
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -1319,6 +1335,11 @@ struct task_struct {
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+       /* IRQ handler threads */
+       unsigned irq_thread:1;
+#endif
+
        pid_t pid;
        pid_t tgid;
 
@@ -1427,11 +1448,6 @@ struct task_struct {
  * mempolicy */
        spinlock_t alloc_lock;
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-       /* IRQ handler threads */
-       struct irqaction *irqaction;
-#endif
-
        /* Protection of the PI data structures: */
        raw_spinlock_t pi_lock;
 
@@ -1777,7 +1793,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * Per process flags
  */
-#define PF_STARTING    0x00000002      /* being created */
 #define PF_EXITING     0x00000004      /* getting shut down */
 #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
 #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
@@ -1864,8 +1879,7 @@ extern void task_clear_jobctl_pending(struct task_struct *task,
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
@@ -2049,7 +2063,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
 #endif
 #else
 static inline void sched_autogroup_create_attach(struct task_struct *p) { }
@@ -2066,12 +2080,20 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+       return tsk->pi_blocked_on != NULL;
+}
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
 {
        return p->normal_prio;
 }
 # define rt_mutex_adjust_pi(p)         do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+       return false;
+}
 #endif
 
 extern bool yield_to(struct task_struct *p, bool preempt);
@@ -2371,7 +2393,7 @@ static inline int thread_group_empty(struct task_struct *p)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[].
+ * ->cgroup.subsys[]. And ->vfork_done.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
@@ -2390,12 +2412,15 @@ static inline void task_unlock(struct task_struct *p)
 extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
                                                        unsigned long *flags);
 
-#define lock_task_sighand(tsk, flags)                                  \
-({     struct sighand_struct *__ss;                                    \
-       __cond_lock(&(tsk)->sighand->siglock,                           \
-                   (__ss = __lock_task_sighand(tsk, flags)));          \
-       __ss;                                                           \
-})                                                                     \
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+                                                      unsigned long *flags)
+{
+       struct sighand_struct *ret;
+
+       ret = __lock_task_sighand(tsk, flags);
+       (void)__cond_lock(&tsk->sighand->siglock, ret);
+       return ret;
+}
 
 static inline void unlock_task_sighand(struct task_struct *tsk,
                                                unsigned long *flags)
index e1b0059..d3d5fa5 100644 (file)
@@ -99,15 +99,18 @@ long srcu_batches_completed(struct srcu_struct *sp);
  * power mode. This way we can notice an extended quiescent state to
  * other CPUs that started a grace period. Otherwise we would delay any
  * grace period as long as we run in the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
-       if (rcu_is_cpu_idle())
-               return 0;
-
        if (!debug_lockdep_rcu_enabled())
                return 1;
-
+       if (rcu_is_cpu_idle())
+               return 0;
+       if (!rcu_lockdep_current_cpu_online())
+               return 0;
        return lock_is_held(&sp->dep_map);
 }
 
@@ -169,6 +172,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
        int retval = __srcu_read_lock(sp);
 
        rcu_lock_acquire(&(sp)->dep_map);
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "srcu_read_lock() used illegally while idle");
        return retval;
 }
 
@@ -182,6 +187,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
        __releases(sp)
 {
+       rcu_lockdep_assert(!rcu_is_cpu_idle(),
+                          "srcu_read_unlock() used illegally while idle");
        rcu_lock_release(&(sp)->dep_map);
        __srcu_read_unlock(sp, idx);
 }
diff --git a/include/linux/static_key.h b/include/linux/static_key.h
new file mode 100644 (file)
index 0000000..27bd3f8
--- /dev/null
@@ -0,0 +1 @@
+#include <linux/jump_label.h>
index 46a85c9..3c7ffdb 100644 (file)
@@ -412,7 +412,8 @@ struct tcp_sock {
 
        struct tcp_sack_block recv_sack_cache[4];
 
-       struct sk_buff *highest_sack;   /* highest skb with SACK received
+       struct sk_buff *highest_sack;   /* skb just after the highest
+                                        * skb with SACKed bit set
                                         * (validity guaranteed only if
                                         * sacked_out > 0)
                                         */
index aa60fe7..b75e186 100644 (file)
@@ -234,23 +234,9 @@ struct timex {
 extern unsigned long tick_usec;                /* USER_HZ period (usec) */
 extern unsigned long tick_nsec;                /* ACTHZ          period (nsec) */
 
-/*
- * phase-lock loop variables
- */
-extern int time_status;                /* clock synchronization status bits */
-
 extern void ntp_init(void);
 extern void ntp_clear(void);
 
-/**
- * ntp_synced - Returns 1 if the NTP status is not UNSYNC
- *
- */
-static inline int ntp_synced(void)
-{
-       return !(time_status & STA_UNSYNC);
-}
-
 /* Required to safely shift negative values */
 #define shift_right(x, s) ({   \
        __typeof__(x) __x = (x);        \
@@ -264,10 +250,9 @@ static inline int ntp_synced(void)
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
-extern u64 tick_length;
+extern u64 ntp_tick_length(void);
 
 extern void second_overflow(void);
-extern void update_ntp_one_tick(void);
 extern int do_adjtimex(struct timex *);
 extern void hardpps(const struct timespec *, const struct timespec *);
 
index df0a779..bd96ecd 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 
 struct module;
 struct tracepoint;
@@ -29,7 +29,7 @@ struct tracepoint_func {
 
 struct tracepoint {
        const char *name;               /* Tracepoint name */
-       struct jump_label_key key;
+       struct static_key key;
        void (*regfunc)(void);
        void (*unregfunc)(void);
        struct tracepoint_func __rcu *funcs;
@@ -114,7 +114,7 @@ static inline void tracepoint_synchronize_unregister(void)
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args, cond)                              \
+#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu)             \
        do {                                                            \
                struct tracepoint_func *it_func_ptr;                    \
                void *it_func;                                          \
@@ -122,6 +122,7 @@ static inline void tracepoint_synchronize_unregister(void)
                                                                        \
                if (!(cond))                                            \
                        return;                                         \
+               prercu;                                                 \
                rcu_read_lock_sched_notrace();                          \
                it_func_ptr = rcu_dereference_sched((tp)->funcs);       \
                if (it_func_ptr) {                                      \
@@ -132,6 +133,7 @@ static inline void tracepoint_synchronize_unregister(void)
                        } while ((++it_func_ptr)->func);                \
                }                                                       \
                rcu_read_unlock_sched_notrace();                        \
+               postrcu;                                                \
        } while (0)
 
 /*
@@ -139,15 +141,25 @@ static inline void tracepoint_synchronize_unregister(void)
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \
        {                                                               \
+               if (static_key_false(&__tracepoint_##name.key))         \
+                       __DO_TRACE(&__tracepoint_##name,                \
+                               TP_PROTO(data_proto),                   \
+                               TP_ARGS(data_args),                     \
+                               TP_CONDITION(cond),,);                  \
+       }                                                               \
+       static inline void trace_##name##_rcuidle(proto)                \
+       {                                                               \
                if (static_branch(&__tracepoint_##name.key))            \
                        __DO_TRACE(&__tracepoint_##name,                \
                                TP_PROTO(data_proto),                   \
                                TP_ARGS(data_args),                     \
-                               TP_CONDITION(cond));                    \
+                               TP_CONDITION(cond),                     \
+                               rcu_idle_exit(),                        \
+                               rcu_idle_enter());                      \
        }                                                               \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \
@@ -176,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void)
        __attribute__((section("__tracepoints_strings"))) = #name;       \
        struct tracepoint __tracepoint_##name                            \
        __attribute__((section("__tracepoints"))) =                      \
-               { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
+               { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
        static struct tracepoint * const __tracepoint_ptr_##name __used  \
        __attribute__((section("__tracepoints_ptrs"))) =                 \
                &__tracepoint_##name;
@@ -190,9 +202,11 @@ static inline void tracepoint_synchronize_unregister(void)
        EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        static inline void trace_##name(proto)                          \
        { }                                                             \
+       static inline void trace_##name##_rcuidle(proto)                \
+       { }                                                             \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto),                \
                              void *data)                               \
index a9ce45e..7d9a9e9 100644 (file)
@@ -157,7 +157,7 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
                        void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_bit(wait_queue_head_t *, void *, int);
 int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
@@ -170,7 +170,8 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up(x)                     __wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)              __wake_up(x, TASK_NORMAL, nr, NULL)
 #define wake_up_all(x)                 __wake_up(x, TASK_NORMAL, 0, NULL)
-#define wake_up_locked(x)              __wake_up_locked((x), TASK_NORMAL)
+#define wake_up_locked(x)              __wake_up_locked((x), TASK_NORMAL, 1)
+#define wake_up_all_locked(x)          __wake_up_locked((x), TASK_NORMAL, 0)
 
 #define wake_up_interruptible(x)       __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_interruptible_nr(x, nr)        __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
index eb8b9f1..af15545 100644 (file)
@@ -289,12 +289,16 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
+ *
+ * system_nrt_freezable_wq is equivalent to system_nrt_wq except that
+ * it's freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_nrt_freezable_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
index 06b795d..b94765e 100644 (file)
@@ -35,12 +35,12 @@ struct inet_peer {
 
        u32                     metrics[RTAX_MAX];
        u32                     rate_tokens;    /* rate limiting for ICMP */
-       int                     redirect_genid;
        unsigned long           rate_last;
        unsigned long           pmtu_expires;
        u32                     pmtu_orig;
        u32                     pmtu_learned;
        struct inetpeer_addr_base redirect_learned;
+       struct list_head        gc_list;
        /*
         * Once inet_peer is queued for deletion (refcnt == -1), following fields
         * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
@@ -96,6 +96,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
+extern void inetpeer_invalidate_tree(int family);
+
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
index 91c1c8b..dcde2d9 100644 (file)
@@ -55,7 +55,7 @@
 #include <linux/uaccess.h>
 #include <linux/memcontrol.h>
 #include <linux/res_counter.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
@@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk)
 #endif /* SOCK_REFCNT_DEBUG */
 
 #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET)
-extern struct jump_label_key memcg_socket_limit_enabled;
+extern struct static_key memcg_socket_limit_enabled;
 static inline struct cg_proto *parent_cg_proto(struct proto *proto,
                                               struct cg_proto *cg_proto)
 {
        return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
 }
-#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
+#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
 #else
 #define mem_cgroup_sockets_enabled 0
 static inline struct cg_proto *parent_cg_proto(struct proto *proto,
index 42c29bf..2d80c29 100644 (file)
@@ -1364,8 +1364,9 @@ static inline void tcp_push_pending_frames(struct sock *sk)
        }
 }
 
-/* Start sequence of the highest skb with SACKed bit, valid only if
- * sacked > 0 or when the caller has ensured validity by itself.
+/* Start sequence of the skb just after the highest skb with SACKed
+ * bit, valid only if sacked_out > 0 or when the caller has ensured
+ * validity by itself.
  */
 static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
 {
index 1bcc2a8..14b3894 100644 (file)
@@ -151,6 +151,8 @@ enum {
    events get removed */
 static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
 static inline void trace_power_end(u64 cpuid) {};
+static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {};
+static inline void trace_power_end_rcuidle(u64 cpuid) {};
 static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
 #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
 
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h
new file mode 100644 (file)
index 0000000..94ec79c
--- /dev/null
@@ -0,0 +1,41 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM printk
+
+#if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PRINTK_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT_CONDITION(console,
+       TP_PROTO(const char *log_buf, unsigned start, unsigned end,
+                unsigned log_buf_len),
+
+       TP_ARGS(log_buf, start, end, log_buf_len),
+
+       TP_CONDITION(start != end),
+
+       TP_STRUCT__entry(
+               __dynamic_array(char, msg, end - start + 1)
+       ),
+
+       TP_fast_assign(
+               if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) {
+                       memcpy(__get_dynamic_array(msg),
+                              log_buf + (start & (log_buf_len - 1)),
+                              log_buf_len - (start & (log_buf_len - 1)));
+                       memcpy((char *)__get_dynamic_array(msg) +
+                              log_buf_len - (start & (log_buf_len - 1)),
+                              log_buf, end & (log_buf_len - 1));
+               } else
+                       memcpy(__get_dynamic_array(msg),
+                              log_buf + (start & (log_buf_len - 1)),
+                              end - start);
+               ((char *)__get_dynamic_array(msg))[end - start] = 0;
+       ),
+
+       TP_printk("%s", __get_str(msg))
+);
+#endif /* _TRACE_PRINTK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index d2d88be..3370997 100644 (file)
@@ -313,19 +313,22 @@ TRACE_EVENT(rcu_prep_idle,
 /*
  * Tracepoint for the registration of a single RCU callback function.
  * The first argument is the type of RCU, the second argument is
- * a pointer to the RCU callback itself, and the third element is the
- * new RCU callback queue length for the current CPU.
+ * a pointer to the RCU callback itself, the third element is the
+ * number of lazy callbacks queued, and the fourth element is the
+ * total number of callbacks queued.
  */
 TRACE_EVENT(rcu_callback,
 
-       TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
+       TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy,
+                long qlen),
 
-       TP_ARGS(rcuname, rhp, qlen),
+       TP_ARGS(rcuname, rhp, qlen_lazy, qlen),
 
        TP_STRUCT__entry(
                __field(char *, rcuname)
                __field(void *, rhp)
                __field(void *, func)
+               __field(long, qlen_lazy)
                __field(long, qlen)
        ),
 
@@ -333,11 +336,13 @@ TRACE_EVENT(rcu_callback,
                __entry->rcuname = rcuname;
                __entry->rhp = rhp;
                __entry->func = rhp->func;
+               __entry->qlen_lazy = qlen_lazy;
                __entry->qlen = qlen;
        ),
 
-       TP_printk("%s rhp=%p func=%pf %ld",
-                 __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
+       TP_printk("%s rhp=%p func=%pf %ld/%ld",
+                 __entry->rcuname, __entry->rhp, __entry->func,
+                 __entry->qlen_lazy, __entry->qlen)
 );
 
 /*
@@ -345,20 +350,21 @@ TRACE_EVENT(rcu_callback,
  * kfree() form.  The first argument is the RCU type, the second argument
  * is a pointer to the RCU callback, the third argument is the offset
  * of the callback within the enclosing RCU-protected data structure,
- * and the fourth argument is the new RCU callback queue length for the
- * current CPU.
+ * the fourth argument is the number of lazy callbacks queued, and the
+ * fifth argument is the total number of callbacks queued.
  */
 TRACE_EVENT(rcu_kfree_callback,
 
        TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
-                long qlen),
+                long qlen_lazy, long qlen),
 
-       TP_ARGS(rcuname, rhp, offset, qlen),
+       TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen),
 
        TP_STRUCT__entry(
                __field(char *, rcuname)
                __field(void *, rhp)
                __field(unsigned long, offset)
+               __field(long, qlen_lazy)
                __field(long, qlen)
        ),
 
@@ -366,41 +372,45 @@ TRACE_EVENT(rcu_kfree_callback,
                __entry->rcuname = rcuname;
                __entry->rhp = rhp;
                __entry->offset = offset;
+               __entry->qlen_lazy = qlen_lazy;
                __entry->qlen = qlen;
        ),
 
-       TP_printk("%s rhp=%p func=%ld %ld",
+       TP_printk("%s rhp=%p func=%ld %ld/%ld",
                  __entry->rcuname, __entry->rhp, __entry->offset,
-                 __entry->qlen)
+                 __entry->qlen_lazy, __entry->qlen)
 );
 
 /*
  * Tracepoint for marking the beginning rcu_do_batch, performed to start
  * RCU callback invocation.  The first argument is the RCU flavor,
- * the second is the total number of callbacks (including those that
- * are not yet ready to be invoked), and the third argument is the
- * current RCU-callback batch limit.
+ * the second is the number of lazy callbacks queued, the third is
+ * the total number of callbacks queued, and the fourth argument is
+ * the current RCU-callback batch limit.
  */
 TRACE_EVENT(rcu_batch_start,
 
-       TP_PROTO(char *rcuname, long qlen, int blimit),
+       TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
 
-       TP_ARGS(rcuname, qlen, blimit),
+       TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
 
        TP_STRUCT__entry(
                __field(char *, rcuname)
+               __field(long, qlen_lazy)
                __field(long, qlen)
                __field(int, blimit)
        ),
 
        TP_fast_assign(
                __entry->rcuname = rcuname;
+               __entry->qlen_lazy = qlen_lazy;
                __entry->qlen = qlen;
                __entry->blimit = blimit;
        ),
 
-       TP_printk("%s CBs=%ld bl=%d",
-                 __entry->rcuname, __entry->qlen, __entry->blimit)
+       TP_printk("%s CBs=%ld/%ld bl=%d",
+                 __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
+                 __entry->blimit)
 );
 
 /*
@@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+                                   qsmask) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
-#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
+#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
+                                        grplo, grphi, gp_tasks) do { } \
+       while (0)
 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
 #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
 #define trace_rcu_prep_idle(reason) do { } while (0)
-#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
-#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
-#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
+#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
+#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
+       do { } while (0)
+#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \
+       do { } while (0)
 #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
index e33ed1b..fbc7b1a 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
+#include <linux/binfmts.h>
 
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
@@ -276,6 +277,32 @@ TRACE_EVENT(sched_process_fork,
 );
 
 /*
+ * Tracepoint for exec:
+ */
+TRACE_EVENT(sched_process_exec,
+
+       TP_PROTO(struct task_struct *p, pid_t old_pid,
+                struct linux_binprm *bprm),
+
+       TP_ARGS(p, old_pid, bprm),
+
+       TP_STRUCT__entry(
+               __string(       filename,       bprm->filename  )
+               __field(        pid_t,          pid             )
+               __field(        pid_t,          old_pid         )
+       ),
+
+       TP_fast_assign(
+               __assign_str(filename, bprm->filename);
+               __entry->pid            = p->pid;
+               __entry->old_pid        = p->pid;
+       ),
+
+       TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
+                 __entry->pid, __entry->old_pid)
+);
+
+/*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
index 17df434..39a8a43 100644 (file)
                }                                               \
        } while (0)
 
+#ifndef TRACE_HEADER_MULTI_READ
+enum {
+       TRACE_SIGNAL_DELIVERED,
+       TRACE_SIGNAL_IGNORED,
+       TRACE_SIGNAL_ALREADY_PENDING,
+       TRACE_SIGNAL_OVERFLOW_FAIL,
+       TRACE_SIGNAL_LOSE_INFO,
+};
+#endif
+
 /**
  * signal_generate - called when a signal is generated
  * @sig: signal number
  * @info: pointer to struct siginfo
  * @task: pointer to struct task_struct
+ * @group: shared or private
+ * @result: TRACE_SIGNAL_*
  *
  * Current process sends a 'sig' signal to 'task' process with
  * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
  */
 TRACE_EVENT(signal_generate,
 
-       TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+       TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
+                       int group, int result),
 
-       TP_ARGS(sig, info, task),
+       TP_ARGS(sig, info, task, group, result),
 
        TP_STRUCT__entry(
                __field(        int,    sig                     )
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate,
                __field(        int,    code                    )
                __array(        char,   comm,   TASK_COMM_LEN   )
                __field(        pid_t,  pid                     )
+               __field(        int,    group                   )
+               __field(        int,    result                  )
        ),
 
        TP_fast_assign(
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate,
                TP_STORE_SIGINFO(__entry, info);
                memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
                __entry->pid    = task->pid;
+               __entry->group  = group;
+               __entry->result = result;
        ),
 
-       TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+       TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
                  __entry->sig, __entry->errno, __entry->code,
-                 __entry->comm, __entry->pid)
+                 __entry->comm, __entry->pid, __entry->group,
+                 __entry->result)
 );
 
 /**
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver,
                  __entry->sa_handler, __entry->sa_flags)
 );
 
-DECLARE_EVENT_CLASS(signal_queue_overflow,
-
-       TP_PROTO(int sig, int group, struct siginfo *info),
-
-       TP_ARGS(sig, group, info),
-
-       TP_STRUCT__entry(
-               __field(        int,    sig     )
-               __field(        int,    group   )
-               __field(        int,    errno   )
-               __field(        int,    code    )
-       ),
-
-       TP_fast_assign(
-               __entry->sig    = sig;
-               __entry->group  = group;
-               TP_STORE_SIGINFO(__entry, info);
-       ),
-
-       TP_printk("sig=%d group=%d errno=%d code=%d",
-                 __entry->sig, __entry->group, __entry->errno, __entry->code)
-);
-
-/**
- * signal_overflow_fail - called when signal queue is overflow
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel fails to generate 'sig' signal with 'info' siginfo, because
- * siginfo queue is overflow, and the signal is dropped.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
-
-       TP_PROTO(int sig, int group, struct siginfo *info),
-
-       TP_ARGS(sig, group, info)
-);
-
-/**
- * signal_lose_info - called when siginfo is lost
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
- * queue is overflow.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of non-RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
-
-       TP_PROTO(int sig, int group, struct siginfo *info),
-
-       TP_ARGS(sig, group, info)
-);
-
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */
index 3f42cd6..72f33fa 100644 (file)
@@ -438,15 +438,6 @@ config PREEMPT_RCU
          This option enables preemptible-RCU code that is common between
          the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
-config RCU_TRACE
-       bool "Enable tracing for RCU"
-       help
-         This option provides tracing in RCU which presents stats
-         in debugfs for debugging RCU implementation.
-
-         Say Y here if you want to enable RCU tracing
-         Say N if you are unsure.
-
 config RCU_FANOUT
        int "Tree-based hierarchical RCU fanout value"
        range 2 64 if 64BIT
index ff49a6d..4990f7e 100644 (file)
@@ -374,11 +374,8 @@ static noinline void __init_refok rest_init(void)
         * at least once to get things moving:
         */
        init_idle_bootup_task(current);
-       preempt_enable_no_resched();
-       schedule();
-
+       schedule_preempt_disabled();
        /* Call into cpu_idle with preempt disabled */
-       preempt_disable();
        cpu_idle();
 }
 
index 1b5c081..c61234b 100644 (file)
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP)
 
+/*
+ * branch priv levels that need permission checks
+ */
+#define PERF_SAMPLE_BRANCH_PERM_PLM \
+       (PERF_SAMPLE_BRANCH_KERNEL |\
+        PERF_SAMPLE_BRANCH_HV)
+
 enum event_type_t {
        EVENT_FLEXIBLE = 0x1,
        EVENT_PINNED = 0x2,
@@ -128,8 +135,9 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct jump_label_key_deferred perf_sched_events __read_mostly;
+struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
+static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
        if (is_cgroup_event(event))
                ctx->nr_cgroups++;
 
+       if (has_branch_stack(event))
+               ctx->nr_branch_stack++;
+
        list_add_rcu(&event->event_entry, &ctx->event_list);
        if (!ctx->nr_events)
                perf_pmu_rotate_start(ctx->pmu);
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
                        cpuctx->cgrp = NULL;
        }
 
+       if (has_branch_stack(event))
+               ctx->nr_branch_stack--;
+
        ctx->nr_events--;
        if (event->attr.inherit_stat)
                ctx->nr_stat--;
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 }
 
 /*
+ * When sampling the branck stack in system-wide, it may be necessary
+ * to flush the stack on context switch. This happens when the branch
+ * stack does not tag its entries with the pid of the current task.
+ * Otherwise it becomes impossible to associate a branch entry with a
+ * task. This ambiguity is more likely to appear when the branch stack
+ * supports priv level filtering and the user sets it to monitor only
+ * at the user level (which could be a useful measurement in system-wide
+ * mode). In that case, the risk is high of having a branch stack with
+ * branch from multiple tasks. Flushing may mean dropping the existing
+ * entries or stashing them somewhere in the PMU specific code layer.
+ *
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when there is at least one system-wide context
+ * with at least one active event using taken branch sampling.
+ */
+static void perf_branch_stack_sched_in(struct task_struct *prev,
+                                      struct task_struct *task)
+{
+       struct perf_cpu_context *cpuctx;
+       struct pmu *pmu;
+       unsigned long flags;
+
+       /* no need to flush branch stack if not changing task */
+       if (prev == task)
+               return;
+
+       local_irq_save(flags);
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(pmu, &pmus, entry) {
+               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+               /*
+                * check if the context has at least one
+                * event using PERF_SAMPLE_BRANCH_STACK
+                */
+               if (cpuctx->ctx.nr_branch_stack > 0
+                   && pmu->flush_branch_stack) {
+
+                       pmu = cpuctx->ctx.pmu;
+
+                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+
+                       perf_pmu_disable(pmu);
+
+                       pmu->flush_branch_stack();
+
+                       perf_pmu_enable(pmu);
+
+                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+               }
+       }
+
+       rcu_read_unlock();
+
+       local_irq_restore(flags);
+}
+
+/*
  * Called from scheduler to add the events of the current task
  * with interrupts disabled.
  *
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
         */
        if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
                perf_cgroup_sched_in(prev, task);
+
+       /* check for system-wide branch_stack events */
+       if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
+               perf_branch_stack_sched_in(prev, task);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2778,7 +2856,7 @@ static void free_event(struct perf_event *event)
 
        if (!event->parent) {
                if (event->attach_state & PERF_ATTACH_TASK)
-                       jump_label_dec_deferred(&perf_sched_events);
+                       static_key_slow_dec_deferred(&perf_sched_events);
                if (event->attr.mmap || event->attr.mmap_data)
                        atomic_dec(&nr_mmap_events);
                if (event->attr.comm)
@@ -2789,7 +2867,15 @@ static void free_event(struct perf_event *event)
                        put_callchain_buffers();
                if (is_cgroup_event(event)) {
                        atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
-                       jump_label_dec_deferred(&perf_sched_events);
+                       static_key_slow_dec_deferred(&perf_sched_events);
+               }
+
+               if (has_branch_stack(event)) {
+                       static_key_slow_dec_deferred(&perf_sched_events);
+                       /* is system-wide event */
+                       if (!(event->attach_state & PERF_ATTACH_TASK))
+                               atomic_dec(&per_cpu(perf_branch_stack_events,
+                                                   event->cpu));
                }
        }
 
@@ -3238,10 +3324,6 @@ int perf_event_task_disable(void)
        return 0;
 }
 
-#ifndef PERF_EVENT_INDEX_OFFSET
-# define PERF_EVENT_INDEX_OFFSET 0
-#endif
-
 static int perf_event_index(struct perf_event *event)
 {
        if (event->hw.state & PERF_HES_STOPPED)
@@ -3250,21 +3332,26 @@ static int perf_event_index(struct perf_event *event)
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return 0;
 
-       return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+       return event->pmu->event_idx(event);
 }
 
 static void calc_timer_values(struct perf_event *event,
+                               u64 *now,
                                u64 *enabled,
                                u64 *running)
 {
-       u64 now, ctx_time;
+       u64 ctx_time;
 
-       now = perf_clock();
-       ctx_time = event->shadow_ctx_time + now;
+       *now = perf_clock();
+       ctx_time = event->shadow_ctx_time + *now;
        *enabled = ctx_time - event->tstamp_enabled;
        *running = ctx_time - event->tstamp_running;
 }
 
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3274,7 +3361,7 @@ void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
        struct ring_buffer *rb;
-       u64 enabled, running;
+       u64 enabled, running, now;
 
        rcu_read_lock();
        /*
@@ -3286,7 +3373,7 @@ void perf_event_update_userpage(struct perf_event *event)
         * because of locking issue as we can be called in
         * NMI context
         */
-       calc_timer_values(event, &enabled, &running);
+       calc_timer_values(event, &now, &enabled, &running);
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;
@@ -3302,7 +3389,7 @@ void perf_event_update_userpage(struct perf_event *event)
        barrier();
        userpg->index = perf_event_index(event);
        userpg->offset = perf_event_count(event);
-       if (event->state == PERF_EVENT_STATE_ACTIVE)
+       if (userpg->index)
                userpg->offset -= local64_read(&event->hw.prev_count);
 
        userpg->time_enabled = enabled +
@@ -3311,6 +3398,8 @@ void perf_event_update_userpage(struct perf_event *event)
        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
 
+       perf_update_user_clock(userpg, now);
+
        barrier();
        ++userpg->lock;
        preempt_enable();
@@ -3568,6 +3657,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        event->mmap_user = get_current_user();
        vma->vm_mm->pinned_vm += event->mmap_locked;
 
+       perf_event_update_userpage(event);
+
 unlock:
        if (!ret)
                atomic_inc(&event->mmap_count);
@@ -3799,7 +3890,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
-       u64 enabled = 0, running = 0;
+       u64 enabled = 0, running = 0, now;
        u64 read_format = event->attr.read_format;
 
        /*
@@ -3812,7 +3903,7 @@ static void perf_output_read(struct perf_output_handle *handle,
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
-               calc_timer_values(event, &enabled, &running);
+               calc_timer_values(event, &now, &enabled, &running);
 
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
@@ -3902,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
                        }
                }
        }
+
+       if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+               if (data->br_stack) {
+                       size_t size;
+
+                       size = data->br_stack->nr
+                            * sizeof(struct perf_branch_entry);
+
+                       perf_output_put(handle, data->br_stack->nr);
+                       perf_output_copy(handle, data->br_stack->entries, size);
+               } else {
+                       /*
+                        * we always store at least the value of nr
+                        */
+                       u64 nr = 0;
+                       perf_output_put(handle, nr);
+               }
+       }
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -3944,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
                WARN_ON_ONCE(size & (sizeof(u64)-1));
                header->size += size;
        }
+
+       if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+               int size = sizeof(u64); /* nr */
+               if (data->br_stack) {
+                       size += data->br_stack->nr
+                             * sizeof(struct perf_branch_entry);
+               }
+               header->size += size;
+       }
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -4986,7 +5104,7 @@ fail:
        return err;
 }
 
-struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
+struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
 {
@@ -4994,7 +5112,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 
        WARN_ON(event->parent);
 
-       jump_label_dec(&perf_swevent_enabled[event_id]);
+       static_key_slow_dec(&perf_swevent_enabled[event_id]);
        swevent_hlist_put(event);
 }
 
@@ -5005,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
        if (event->attr.type != PERF_TYPE_SOFTWARE)
                return -ENOENT;
 
+       /*
+        * no branch sampling for software events
+        */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        switch (event_id) {
        case PERF_COUNT_SW_CPU_CLOCK:
        case PERF_COUNT_SW_TASK_CLOCK:
@@ -5024,13 +5148,18 @@ static int perf_swevent_init(struct perf_event *event)
                if (err)
                        return err;
 
-               jump_label_inc(&perf_swevent_enabled[event_id]);
+               static_key_slow_inc(&perf_swevent_enabled[event_id]);
                event->destroy = sw_perf_event_destroy;
        }
 
        return 0;
 }
 
+static int perf_swevent_event_idx(struct perf_event *event)
+{
+       return 0;
+}
+
 static struct pmu perf_swevent = {
        .task_ctx_nr    = perf_sw_context,
 
@@ -5040,6 +5169,8 @@ static struct pmu perf_swevent = {
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 #ifdef CONFIG_EVENT_TRACING
@@ -5108,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;
 
+       /*
+        * no branch sampling for tracepoint events
+        */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        err = perf_trace_init(event);
        if (err)
                return err;
@@ -5126,6 +5263,8 @@ static struct pmu perf_tracepoint = {
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 static inline void perf_tp_register(void)
@@ -5331,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
        if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
                return -ENOENT;
 
+       /*
+        * no branch sampling for software events
+        */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        perf_swevent_init_hrtimer(event);
 
        return 0;
@@ -5345,6 +5490,8 @@ static struct pmu perf_cpu_clock = {
        .start          = cpu_clock_event_start,
        .stop           = cpu_clock_event_stop,
        .read           = cpu_clock_event_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 /*
@@ -5403,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
        if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
                return -ENOENT;
 
+       /*
+        * no branch sampling for software events
+        */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
        perf_swevent_init_hrtimer(event);
 
        return 0;
@@ -5417,6 +5570,8 @@ static struct pmu perf_task_clock = {
        .start          = task_clock_event_start,
        .stop           = task_clock_event_stop,
        .read           = task_clock_event_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5444,6 +5599,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
        perf_pmu_enable(pmu);
 }
 
+static int perf_event_idx_default(struct perf_event *event)
+{
+       return event->hw.idx + 1;
+}
+
 /*
  * Ensures all contexts with the same task_ctx_nr have the same
  * pmu_cpu_context too.
@@ -5530,6 +5690,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
        if (!pmu->dev)
                goto out;
 
+       pmu->dev->groups = pmu->attr_groups;
        device_initialize(pmu->dev);
        ret = dev_set_name(pmu->dev, "%s", pmu->name);
        if (ret)
@@ -5633,6 +5794,9 @@ got_cpu_context:
                pmu->pmu_disable = perf_pmu_nop_void;
        }
 
+       if (!pmu->event_idx)
+               pmu->event_idx = perf_event_idx_default;
+
        list_add_rcu(&pmu->entry, &pmus);
        ret = 0;
 unlock:
@@ -5825,7 +5989,7 @@ done:
 
        if (!event->parent) {
                if (event->attach_state & PERF_ATTACH_TASK)
-                       jump_label_inc(&perf_sched_events.key);
+                       static_key_slow_inc(&perf_sched_events.key);
                if (event->attr.mmap || event->attr.mmap_data)
                        atomic_inc(&nr_mmap_events);
                if (event->attr.comm)
@@ -5839,6 +6003,12 @@ done:
                                return ERR_PTR(err);
                        }
                }
+               if (has_branch_stack(event)) {
+                       static_key_slow_inc(&perf_sched_events.key);
+                       if (!(event->attach_state & PERF_ATTACH_TASK))
+                               atomic_inc(&per_cpu(perf_branch_stack_events,
+                                                   event->cpu));
+               }
        }
 
        return event;
@@ -5908,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (attr->read_format & ~(PERF_FORMAT_MAX-1))
                return -EINVAL;
 
+       if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
+               u64 mask = attr->branch_sample_type;
+
+               /* only using defined bits */
+               if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
+                       return -EINVAL;
+
+               /* at least one branch bit must be set */
+               if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
+                       return -EINVAL;
+
+               /* kernel level capture: check permissions */
+               if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
+                   && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+
+               /* propagate priv level, when not set for branch */
+               if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
+
+                       /* exclude_kernel checked on syscall entry */
+                       if (!attr->exclude_kernel)
+                               mask |= PERF_SAMPLE_BRANCH_KERNEL;
+
+                       if (!attr->exclude_user)
+                               mask |= PERF_SAMPLE_BRANCH_USER;
+
+                       if (!attr->exclude_hv)
+                               mask |= PERF_SAMPLE_BRANCH_HV;
+                       /*
+                        * adjust user setting (for HW filter setup)
+                        */
+                       attr->branch_sample_type = mask;
+               }
+       }
 out:
        return ret;
 
@@ -6063,7 +6267,7 @@ SYSCALL_DEFINE5(perf_event_open,
                 * - that may need work on context switch
                 */
                atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
-               jump_label_inc(&perf_sched_events.key);
+               static_key_slow_inc(&perf_sched_events.key);
        }
 
        /*
index ee706ce..bb38c4d 100644 (file)
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
        if (bp->attr.type != PERF_TYPE_BREAKPOINT)
                return -ENOENT;
 
+       /*
+        * no branch sampling for breakpoint events
+        */
+       if (has_branch_stack(bp))
+               return -EOPNOTSUPP;
+
        err = register_perf_hw_breakpoint(bp);
        if (err)
                return err;
@@ -613,6 +619,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
        bp->hw.state = PERF_HES_STOPPED;
 }
 
+static int hw_breakpoint_event_idx(struct perf_event *bp)
+{
+       return 0;
+}
+
 static struct pmu perf_breakpoint = {
        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 
@@ -622,6 +633,8 @@ static struct pmu perf_breakpoint = {
        .start          = hw_breakpoint_start,
        .stop           = hw_breakpoint_stop,
        .read           = hw_breakpoint_pmu_read,
+
+       .event_idx      = hw_breakpoint_event_idx,
 };
 
 int __init init_hw_breakpoint(void)
index 4b4042f..752d2c0 100644 (file)
@@ -935,8 +935,6 @@ void do_exit(long code)
                schedule();
        }
 
-       exit_irq_thread();
-
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
@@ -945,6 +943,8 @@ void do_exit(long code)
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
 
+       exit_irq_thread();
+
        if (unlikely(in_atomic()))
                printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
                                current->comm, task_pid_nr(current),
index e2cd3e2..26a7a67 100644 (file)
@@ -668,6 +668,38 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
        return mm;
 }
 
+static void complete_vfork_done(struct task_struct *tsk)
+{
+       struct completion *vfork;
+
+       task_lock(tsk);
+       vfork = tsk->vfork_done;
+       if (likely(vfork)) {
+               tsk->vfork_done = NULL;
+               complete(vfork);
+       }
+       task_unlock(tsk);
+}
+
+static int wait_for_vfork_done(struct task_struct *child,
+                               struct completion *vfork)
+{
+       int killed;
+
+       freezer_do_not_count();
+       killed = wait_for_completion_killable(vfork);
+       freezer_count();
+
+       if (killed) {
+               task_lock(child);
+               child->vfork_done = NULL;
+               task_unlock(child);
+       }
+
+       put_task_struct(child);
+       return killed;
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
@@ -683,8 +715,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-       struct completion *vfork_done = tsk->vfork_done;
-
        /* Get rid of any futexes when releasing the mm */
 #ifdef CONFIG_FUTEX
        if (unlikely(tsk->robust_list)) {
@@ -704,17 +734,15 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
 
-       /* notify parent sleeping on vfork() */
-       if (vfork_done) {
-               tsk->vfork_done = NULL;
-               complete(vfork_done);
-       }
+       if (tsk->vfork_done)
+               complete_vfork_done(tsk);
 
        /*
         * If we're exiting normally, clear a user-space tid field if
         * requested.  We leave this alone when dying by signal, to leave
         * the value intact in a core dump, and to save the unnecessary
-        * trouble otherwise.  Userland only wants this done for a sys_exit.
+        * trouble, say, a killed vfork parent shouldn't touch this mm.
+        * Userland only wants this done for a sys_exit.
         */
        if (tsk->clear_child_tid) {
                if (!(tsk->flags & PF_SIGNALED) &&
@@ -1018,7 +1046,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 
        new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
        new_flags |= PF_FORKNOEXEC;
-       new_flags |= PF_STARTING;
        p->flags = new_flags;
 }
 
@@ -1548,16 +1575,9 @@ long do_fork(unsigned long clone_flags,
                if (clone_flags & CLONE_VFORK) {
                        p->vfork_done = &vfork;
                        init_completion(&vfork);
+                       get_task_struct(p);
                }
 
-               /*
-                * We set PF_STARTING at creation in case tracing wants to
-                * use this to distinguish a fully live task from one that
-                * hasn't finished SIGSTOP raising yet.  Now we clear it
-                * and set the child going.
-                */
-               p->flags &= ~PF_STARTING;
-
                wake_up_new_task(p);
 
                /* forking complete and child started to run, tell ptracer */
@@ -1565,10 +1585,8 @@ long do_fork(unsigned long clone_flags,
                        ptrace_event(trace, nr);
 
                if (clone_flags & CLONE_VFORK) {
-                       freezer_do_not_count();
-                       wait_for_completion(&vfork);
-                       freezer_count();
-                       ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+                       if (!wait_for_vfork_done(p, &vfork))
+                               ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
                }
        } else {
                nr = PTR_ERR(p);
index 1614be2..72efa1e 100644 (file)
@@ -2628,7 +2628,7 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
 {
-       int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
+       int cmd = op & FUTEX_CMD_MASK;
        unsigned int flags = 0;
 
        if (!(op & FUTEX_PRIVATE_FLAG))
@@ -2641,49 +2641,44 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
        }
 
        switch (cmd) {
+       case FUTEX_LOCK_PI:
+       case FUTEX_UNLOCK_PI:
+       case FUTEX_TRYLOCK_PI:
+       case FUTEX_WAIT_REQUEUE_PI:
+       case FUTEX_CMP_REQUEUE_PI:
+               if (!futex_cmpxchg_enabled)
+                       return -ENOSYS;
+       }
+
+       switch (cmd) {
        case FUTEX_WAIT:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAIT_BITSET:
-               ret = futex_wait(uaddr, flags, val, timeout, val3);
-               break;
+               return futex_wait(uaddr, flags, val, timeout, val3);
        case FUTEX_WAKE:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAKE_BITSET:
-               ret = futex_wake(uaddr, flags, val, val3);
-               break;
+               return futex_wake(uaddr, flags, val, val3);
        case FUTEX_REQUEUE:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-               break;
+               return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
        case FUTEX_CMP_REQUEUE:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-               break;
+               return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
        case FUTEX_WAKE_OP:
-               ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-               break;
+               return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
        case FUTEX_LOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
-               break;
+               return futex_lock_pi(uaddr, flags, val, timeout, 0);
        case FUTEX_UNLOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_unlock_pi(uaddr, flags);
-               break;
+               return futex_unlock_pi(uaddr, flags);
        case FUTEX_TRYLOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
-               break;
+               return futex_lock_pi(uaddr, flags, 0, timeout, 1);
        case FUTEX_WAIT_REQUEUE_PI:
                val3 = FUTEX_BITSET_MATCH_ANY;
-               ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-                                           uaddr2);
-               break;
+               return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+                                            uaddr2);
        case FUTEX_CMP_REQUEUE_PI:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-               break;
-       default:
-               ret = -ENOSYS;
+               return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
        }
-       return ret;
+       return -ENOSYS;
 }
 
 
index 2e48ec0..c21449f 100644 (file)
@@ -119,15 +119,20 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
  * For preemptible RCU it is sufficient to call rcu_read_unlock in order
  * to exit the grace period. For classic RCU, a reschedule is required.
  */
-static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
 {
+       bool can_cont;
+
        get_task_struct(g);
        get_task_struct(t);
        rcu_read_unlock();
        cond_resched();
        rcu_read_lock();
+       can_cont = pid_alive(g) && pid_alive(t);
        put_task_struct(t);
        put_task_struct(g);
+
+       return can_cont;
 }
 
 /*
@@ -154,9 +159,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
                        goto unlock;
                if (!--batch_count) {
                        batch_count = HUNG_TASK_BATCHING;
-                       rcu_lock_break(g, t);
-                       /* Exit if t or g was unhashed during refresh. */
-                       if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+                       if (!rcu_lock_break(g, t))
                                goto unlock;
                }
                /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
index fb7db75..6080f6b 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 
+#include <trace/events/irq.h>
+
 #include "internals.h"
 
 /**
@@ -61,8 +63,7 @@ int irq_set_irq_type(unsigned int irq, unsigned int type)
                return -EINVAL;
 
        type &= IRQ_TYPE_SENSE_MASK;
-       if (type != IRQ_TYPE_NONE)
-               ret = __irq_set_trigger(desc, irq, type);
+       ret = __irq_set_trigger(desc, irq, type);
        irq_put_desc_busunlock(desc, flags);
        return ret;
 }
index 470d08c..6ff84e6 100644 (file)
@@ -60,7 +60,7 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
         * device interrupt, so no irq storm is lurking. If the
         * RUNTHREAD bit is already set, nothing to do.
         */
-       if (test_bit(IRQTF_DIED, &action->thread_flags) ||
+       if ((action->thread->flags & PF_EXITING) ||
            test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
                return;
 
@@ -110,6 +110,18 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
         * threads_oneshot untouched and runs the thread another time.
         */
        desc->threads_oneshot |= action->thread_mask;
+
+       /*
+        * We increment the threads_active counter in case we wake up
+        * the irq thread. The irq thread decrements the counter when
+        * it returns from the handler or in the exit path and wakes
+        * up waiters which are stuck in synchronize_irq() when the
+        * active count becomes zero. synchronize_irq() is serialized
+        * against this code (hard irq handler) via IRQS_INPROGRESS
+        * like the finalize_oneshot() code. See comment above.
+        */
+       atomic_inc(&desc->threads_active);
+
        wake_up_process(action->thread);
 }
 
index 40378ff..8e5c56b 100644 (file)
@@ -20,14 +20,12 @@ extern bool noirqdebug;
 /*
  * Bits used by threaded handlers:
  * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
- * IRQTF_DIED      - handler thread died
  * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
  * IRQTF_AFFINITY  - irq thread is requested to adjust affinity
  * IRQTF_FORCED_THREAD  - irq action is force threaded
  */
 enum {
        IRQTF_RUNTHREAD,
-       IRQTF_DIED,
        IRQTF_WARNED,
        IRQTF_AFFINITY,
        IRQTF_FORCED_THREAD,
index 32313c0..b0ccd1a 100644 (file)
@@ -759,6 +759,13 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
        return ret;
 }
 
+static void wake_threads_waitq(struct irq_desc *desc)
+{
+       if (atomic_dec_and_test(&desc->threads_active) &&
+           waitqueue_active(&desc->wait_for_threads))
+               wake_up(&desc->wait_for_threads);
+}
+
 /*
  * Interrupt handler thread
  */
@@ -771,57 +778,41 @@ static int irq_thread(void *data)
        struct irq_desc *desc = irq_to_desc(action->irq);
        irqreturn_t (*handler_fn)(struct irq_desc *desc,
                        struct irqaction *action);
-       int wake;
 
-       if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
+       if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
                                        &action->thread_flags))
                handler_fn = irq_forced_thread_fn;
        else
                handler_fn = irq_thread_fn;
 
        sched_setscheduler(current, SCHED_FIFO, &param);
-       current->irqaction = action;
+       current->irq_thread = 1;
 
        while (!irq_wait_for_interrupt(action)) {
+               irqreturn_t action_ret;
 
                irq_thread_check_affinity(desc, action);
 
-               atomic_inc(&desc->threads_active);
-
-               raw_spin_lock_irq(&desc->lock);
-               if (unlikely(irqd_irq_disabled(&desc->irq_data))) {
-                       /*
-                        * CHECKME: We might need a dedicated
-                        * IRQ_THREAD_PENDING flag here, which
-                        * retriggers the thread in check_irq_resend()
-                        * but AFAICT IRQS_PENDING should be fine as it
-                        * retriggers the interrupt itself --- tglx
-                        */
-                       desc->istate |= IRQS_PENDING;
-                       raw_spin_unlock_irq(&desc->lock);
-               } else {
-                       irqreturn_t action_ret;
-
-                       raw_spin_unlock_irq(&desc->lock);
-                       action_ret = handler_fn(desc, action);
-                       if (!noirqdebug)
-                               note_interrupt(action->irq, desc, action_ret);
-               }
+               action_ret = handler_fn(desc, action);
+               if (!noirqdebug)
+                       note_interrupt(action->irq, desc, action_ret);
 
-               wake = atomic_dec_and_test(&desc->threads_active);
-
-               if (wake && waitqueue_active(&desc->wait_for_threads))
-                       wake_up(&desc->wait_for_threads);
+               wake_threads_waitq(desc);
        }
 
-       /* Prevent a stale desc->threads_oneshot */
-       irq_finalize_oneshot(desc, action, true);
-
        /*
-        * Clear irqaction. Otherwise exit_irq_thread() would make
+        * This is the regular exit path. __free_irq() is stopping the
+        * thread via kthread_stop() after calling
+        * synchronize_irq(). So neither IRQTF_RUNTHREAD nor the
+        * oneshot mask bit can be set. We cannot verify that as we
+        * cannot touch the oneshot mask at this point anymore as
+        * __setup_irq() might have given out currents thread_mask
+        * again.
+        *
+        * Clear irq_thread. Otherwise exit_irq_thread() would make
         * fuzz about an active irq thread going into nirvana.
         */
-       current->irqaction = NULL;
+       current->irq_thread = 0;
        return 0;
 }
 
@@ -832,27 +823,28 @@ void exit_irq_thread(void)
 {
        struct task_struct *tsk = current;
        struct irq_desc *desc;
+       struct irqaction *action;
 
-       if (!tsk->irqaction)
+       if (!tsk->irq_thread)
                return;
 
+       action = kthread_data(tsk);
+
        printk(KERN_ERR
               "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
-              tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+              tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
 
-       desc = irq_to_desc(tsk->irqaction->irq);
+       desc = irq_to_desc(action->irq);
 
        /*
-        * Prevent a stale desc->threads_oneshot. Must be called
-        * before setting the IRQTF_DIED flag.
+        * If IRQTF_RUNTHREAD is set, we need to decrement
+        * desc->threads_active and wake possible waiters.
         */
-       irq_finalize_oneshot(desc, tsk->irqaction, true);
+       if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+               wake_threads_waitq(desc);
 
-       /*
-        * Set the THREAD DIED flag to prevent further wakeups of the
-        * soon to be gone threaded handler.
-        */
-       set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+       /* Prevent a stale desc->threads_oneshot */
+       irq_finalize_oneshot(desc, action, true);
 }
 
 static void irq_setup_forced_threading(struct irqaction *new)
@@ -985,6 +977,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
                /* add new interrupt at end of irq queue */
                do {
+                       /*
+                        * Or all existing action->thread_mask bits,
+                        * so we can find the next zero bit for this
+                        * new action.
+                        */
                        thread_mask |= old->thread_mask;
                        old_ptr = &old->next;
                        old = *old_ptr;
@@ -993,14 +990,41 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        }
 
        /*
-        * Setup the thread mask for this irqaction. Unlikely to have
-        * 32 resp 64 irqs sharing one line, but who knows.
+        * Setup the thread mask for this irqaction for ONESHOT. For
+        * !ONESHOT irqs the thread mask is 0 so we can avoid a
+        * conditional in irq_wake_thread().
         */
-       if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
-               ret = -EBUSY;
-               goto out_mask;
+       if (new->flags & IRQF_ONESHOT) {
+               /*
+                * Unlikely to have 32 resp 64 irqs sharing one line,
+                * but who knows.
+                */
+               if (thread_mask == ~0UL) {
+                       ret = -EBUSY;
+                       goto out_mask;
+               }
+               /*
+                * The thread_mask for the action is or'ed to
+                * desc->thread_active to indicate that the
+                * IRQF_ONESHOT thread handler has been woken, but not
+                * yet finished. The bit is cleared when a thread
+                * completes. When all threads of a shared interrupt
+                * line have completed desc->threads_active becomes
+                * zero and the interrupt line is unmasked. See
+                * handle.c:irq_wake_thread() for further information.
+                *
+                * If no thread is woken by primary (hard irq context)
+                * interrupt handlers, then desc->threads_active is
+                * also checked for zero to unmask the irq line in the
+                * affected hard irq flow handlers
+                * (handle_[fasteoi|level]_irq).
+                *
+                * The new action gets the first zero bit of
+                * thread_mask assigned. See the loop above which or's
+                * all existing action->thread_mask bits.
+                */
+               new->thread_mask = 1 << ffz(thread_mask);
        }
-       new->thread_mask = 1 << ffz(thread_mask);
 
        if (!shared) {
                init_waitqueue_head(&desc->wait_for_threads);
@@ -1103,8 +1127,7 @@ out_thread:
                struct task_struct *t = new->thread;
 
                new->thread = NULL;
-               if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
-                       kthread_stop(t);
+               kthread_stop(t);
                put_task_struct(t);
        }
 out_mput:
@@ -1214,8 +1237,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 #endif
 
        if (action->thread) {
-               if (!test_bit(IRQTF_DIED, &action->thread_flags))
-                       kthread_stop(action->thread);
+               kthread_stop(action->thread);
                put_task_struct(action->thread);
        }
 
index 01d3b70..4304919 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/err.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 
 #ifdef HAVE_JUMP_LABEL
 
@@ -29,11 +29,6 @@ void jump_label_unlock(void)
        mutex_unlock(&jump_label_mutex);
 }
 
-bool jump_label_enabled(struct jump_label_key *key)
-{
-       return !!atomic_read(&key->enabled);
-}
-
 static int jump_label_cmp(const void *a, const void *b)
 {
        const struct jump_entry *jea = a;
@@ -58,56 +53,66 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
        sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static void jump_label_update(struct jump_label_key *key, int enable);
+static void jump_label_update(struct static_key *key, int enable);
 
-void jump_label_inc(struct jump_label_key *key)
+void static_key_slow_inc(struct static_key *key)
 {
        if (atomic_inc_not_zero(&key->enabled))
                return;
 
        jump_label_lock();
-       if (atomic_read(&key->enabled) == 0)
-               jump_label_update(key, JUMP_LABEL_ENABLE);
+       if (atomic_read(&key->enabled) == 0) {
+               if (!jump_label_get_branch_default(key))
+                       jump_label_update(key, JUMP_LABEL_ENABLE);
+               else
+                       jump_label_update(key, JUMP_LABEL_DISABLE);
+       }
        atomic_inc(&key->enabled);
        jump_label_unlock();
 }
-EXPORT_SYMBOL_GPL(jump_label_inc);
+EXPORT_SYMBOL_GPL(static_key_slow_inc);
 
-static void __jump_label_dec(struct jump_label_key *key,
+static void __static_key_slow_dec(struct static_key *key,
                unsigned long rate_limit, struct delayed_work *work)
 {
-       if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
+       if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
+               WARN(atomic_read(&key->enabled) < 0,
+                    "jump label: negative count!\n");
                return;
+       }
 
        if (rate_limit) {
                atomic_inc(&key->enabled);
                schedule_delayed_work(work, rate_limit);
-       } else
-               jump_label_update(key, JUMP_LABEL_DISABLE);
-
+       } else {
+               if (!jump_label_get_branch_default(key))
+                       jump_label_update(key, JUMP_LABEL_DISABLE);
+               else
+                       jump_label_update(key, JUMP_LABEL_ENABLE);
+       }
        jump_label_unlock();
 }
-EXPORT_SYMBOL_GPL(jump_label_dec);
 
 static void jump_label_update_timeout(struct work_struct *work)
 {
-       struct jump_label_key_deferred *key =
-               container_of(work, struct jump_label_key_deferred, work.work);
-       __jump_label_dec(&key->key, 0, NULL);
+       struct static_key_deferred *key =
+               container_of(work, struct static_key_deferred, work.work);
+       __static_key_slow_dec(&key->key, 0, NULL);
 }
 
-void jump_label_dec(struct jump_label_key *key)
+void static_key_slow_dec(struct static_key *key)
 {
-       __jump_label_dec(key, 0, NULL);
+       __static_key_slow_dec(key, 0, NULL);
 }
+EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
-void jump_label_dec_deferred(struct jump_label_key_deferred *key)
+void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
-       __jump_label_dec(&key->key, key->timeout, &key->work);
+       __static_key_slow_dec(&key->key, key->timeout, &key->work);
 }
+EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 
-
-void jump_label_rate_limit(struct jump_label_key_deferred *key,
+void jump_label_rate_limit(struct static_key_deferred *key,
                unsigned long rl)
 {
        key->timeout = rl;
@@ -150,7 +155,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry
        arch_jump_label_transform(entry, type); 
 }
 
-static void __jump_label_update(struct jump_label_key *key,
+static void __jump_label_update(struct static_key *key,
                                struct jump_entry *entry,
                                struct jump_entry *stop, int enable)
 {
@@ -167,27 +172,40 @@ static void __jump_label_update(struct jump_label_key *key,
        }
 }
 
+static enum jump_label_type jump_label_type(struct static_key *key)
+{
+       bool true_branch = jump_label_get_branch_default(key);
+       bool state = static_key_enabled(key);
+
+       if ((!true_branch && state) || (true_branch && !state))
+               return JUMP_LABEL_ENABLE;
+
+       return JUMP_LABEL_DISABLE;
+}
+
 void __init jump_label_init(void)
 {
        struct jump_entry *iter_start = __start___jump_table;
        struct jump_entry *iter_stop = __stop___jump_table;
-       struct jump_label_key *key = NULL;
+       struct static_key *key = NULL;
        struct jump_entry *iter;
 
        jump_label_lock();
        jump_label_sort_entries(iter_start, iter_stop);
 
        for (iter = iter_start; iter < iter_stop; iter++) {
-               struct jump_label_key *iterk;
+               struct static_key *iterk;
 
-               iterk = (struct jump_label_key *)(unsigned long)iter->key;
-               arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
-                                                JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+               iterk = (struct static_key *)(unsigned long)iter->key;
+               arch_jump_label_transform_static(iter, jump_label_type(iterk));
                if (iterk == key)
                        continue;
 
                key = iterk;
-               key->entries = iter;
+               /*
+                * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
+                */
+               *((unsigned long *)&key->entries) += (unsigned long)iter;
 #ifdef CONFIG_MODULES
                key->next = NULL;
 #endif
@@ -197,8 +215,8 @@ void __init jump_label_init(void)
 
 #ifdef CONFIG_MODULES
 
-struct jump_label_mod {
-       struct jump_label_mod *next;
+struct static_key_mod {
+       struct static_key_mod *next;
        struct jump_entry *entries;
        struct module *mod;
 };
@@ -218,9 +236,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end)
                                start, end);
 }
 
-static void __jump_label_mod_update(struct jump_label_key *key, int enable)
+static void __jump_label_mod_update(struct static_key *key, int enable)
 {
-       struct jump_label_mod *mod = key->next;
+       struct static_key_mod *mod = key->next;
 
        while (mod) {
                struct module *m = mod->mod;
@@ -251,11 +269,7 @@ void jump_label_apply_nops(struct module *mod)
                return;
 
        for (iter = iter_start; iter < iter_stop; iter++) {
-               struct jump_label_key *iterk;
-
-               iterk = (struct jump_label_key *)(unsigned long)iter->key;
-               arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
-                               JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+               arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
        }
 }
 
@@ -264,8 +278,8 @@ static int jump_label_add_module(struct module *mod)
        struct jump_entry *iter_start = mod->jump_entries;
        struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
        struct jump_entry *iter;
-       struct jump_label_key *key = NULL;
-       struct jump_label_mod *jlm;
+       struct static_key *key = NULL;
+       struct static_key_mod *jlm;
 
        /* if the module doesn't have jump label entries, just return */
        if (iter_start == iter_stop)
@@ -274,28 +288,30 @@ static int jump_label_add_module(struct module *mod)
        jump_label_sort_entries(iter_start, iter_stop);
 
        for (iter = iter_start; iter < iter_stop; iter++) {
-               if (iter->key == (jump_label_t)(unsigned long)key)
-                       continue;
+               struct static_key *iterk;
 
-               key = (struct jump_label_key *)(unsigned long)iter->key;
+               iterk = (struct static_key *)(unsigned long)iter->key;
+               if (iterk == key)
+                       continue;
 
+               key = iterk;
                if (__module_address(iter->key) == mod) {
-                       atomic_set(&key->enabled, 0);
-                       key->entries = iter;
+                       /*
+                        * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
+                        */
+                       *((unsigned long *)&key->entries) += (unsigned long)iter;
                        key->next = NULL;
                        continue;
                }
-
-               jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
+               jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
                if (!jlm)
                        return -ENOMEM;
-
                jlm->mod = mod;
                jlm->entries = iter;
                jlm->next = key->next;
                key->next = jlm;
 
-               if (jump_label_enabled(key))
+               if (jump_label_type(key) == JUMP_LABEL_ENABLE)
                        __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
        }
 
@@ -307,14 +323,14 @@ static void jump_label_del_module(struct module *mod)
        struct jump_entry *iter_start = mod->jump_entries;
        struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
        struct jump_entry *iter;
-       struct jump_label_key *key = NULL;
-       struct jump_label_mod *jlm, **prev;
+       struct static_key *key = NULL;
+       struct static_key_mod *jlm, **prev;
 
        for (iter = iter_start; iter < iter_stop; iter++) {
                if (iter->key == (jump_label_t)(unsigned long)key)
                        continue;
 
-               key = (struct jump_label_key *)(unsigned long)iter->key;
+               key = (struct static_key *)(unsigned long)iter->key;
 
                if (__module_address(iter->key) == mod)
                        continue;
@@ -416,12 +432,13 @@ int jump_label_text_reserved(void *start, void *end)
        return ret;
 }
 
-static void jump_label_update(struct jump_label_key *key, int enable)
+static void jump_label_update(struct static_key *key, int enable)
 {
-       struct jump_entry *entry = key->entries, *stop = __stop___jump_table;
+       struct jump_entry *stop = __stop___jump_table;
+       struct jump_entry *entry = jump_label_get_entries(key);
 
 #ifdef CONFIG_MODULES
-       struct module *mod = __module_address((jump_label_t)key);
+       struct module *mod = __module_address((unsigned long)key);
 
        __jump_label_mod_update(key, enable);
 
index 9788c0e..c62b854 100644 (file)
@@ -1334,8 +1334,10 @@ int __kprobes register_kprobe(struct kprobe *p)
        if (!kernel_text_address((unsigned long) p->addr) ||
            in_kprobes_functions((unsigned long) p->addr) ||
            ftrace_text_reserved(p->addr, p->addr) ||
-           jump_label_text_reserved(p->addr, p->addr))
-               goto fail_with_jump_label;
+           jump_label_text_reserved(p->addr, p->addr)) {
+               ret = -EINVAL;
+               goto cannot_probe;
+       }
 
        /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
        p->flags &= KPROBE_FLAG_DISABLED;
@@ -1352,7 +1354,7 @@ int __kprobes register_kprobe(struct kprobe *p)
                 * its code to prohibit unexpected unloading.
                 */
                if (unlikely(!try_module_get(probed_mod)))
-                       goto fail_with_jump_label;
+                       goto cannot_probe;
 
                /*
                 * If the module freed .init.text, we couldn't insert
@@ -1361,7 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
                if (within_module_init((unsigned long)p->addr, probed_mod) &&
                    probed_mod->state != MODULE_STATE_COMING) {
                        module_put(probed_mod);
-                       goto fail_with_jump_label;
+                       goto cannot_probe;
                }
                /* ret will be updated by following code */
        }
@@ -1409,7 +1411,7 @@ out:
 
        return ret;
 
-fail_with_jump_label:
+cannot_probe:
        preempt_enable();
        jump_label_unlock();
        return ret;
index 8889f7d..ea9ee45 100644 (file)
@@ -4176,7 +4176,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
        printk("-------------------------------\n");
        printk("%s:%d %s!\n", file, line, s);
        printk("\nother info that might help us debug this:\n\n");
-       printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
+       printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+              !rcu_lockdep_current_cpu_online()
+                       ? "RCU used illegally from offline CPU!\n"
+                       : rcu_is_cpu_idle()
+                               ? "RCU used illegally from idle CPU!\n"
+                               : "",
+              rcu_scheduler_active, debug_locks);
 
        /*
         * If a CPU is in the RCU-free window in idle (ie: in the section
index 89096dd..a307cc9 100644 (file)
@@ -240,9 +240,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
                /* didn't get the lock, go to sleep: */
                spin_unlock_mutex(&lock->wait_lock, flags);
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+               schedule_preempt_disabled();
                spin_lock_mutex(&lock->wait_lock, flags);
        }
 
index 13c0a11..b663c2c 100644 (file)
@@ -44,6 +44,9 @@
 
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/printk.h>
+
 /*
  * Architectures can override it:
  */
@@ -542,6 +545,8 @@ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
 static void _call_console_drivers(unsigned start,
                                unsigned end, int msg_log_level)
 {
+       trace_console(&LOG_BUF(0), start, end, log_buf_len);
+
        if ((msg_log_level < console_loglevel || ignore_loglevel) &&
                        console_drivers && start != end) {
                if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
@@ -702,6 +707,9 @@ static bool printk_time = 0;
 #endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 
+static bool always_kmsg_dump;
+module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
+
 /* Check if we have any console registered that can be called early in boot. */
 static int have_callable_console(void)
 {
@@ -1208,13 +1216,27 @@ int is_console_locked(void)
        return console_locked;
 }
 
+/*
+ * Delayed printk facility, for scheduler-internal messages:
+ */
+#define PRINTK_BUF_SIZE                512
+
+#define PRINTK_PENDING_WAKEUP  0x01
+#define PRINTK_PENDING_SCHED   0x02
+
 static DEFINE_PER_CPU(int, printk_pending);
+static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
 void printk_tick(void)
 {
        if (__this_cpu_read(printk_pending)) {
-               __this_cpu_write(printk_pending, 0);
-               wake_up_interruptible(&log_wait);
+               int pending = __this_cpu_xchg(printk_pending, 0);
+               if (pending & PRINTK_PENDING_SCHED) {
+                       char *buf = __get_cpu_var(printk_sched_buf);
+                       printk(KERN_WARNING "[sched_delayed] %s", buf);
+               }
+               if (pending & PRINTK_PENDING_WAKEUP)
+                       wake_up_interruptible(&log_wait);
        }
 }
 
@@ -1228,7 +1250,7 @@ int printk_needs_cpu(int cpu)
 void wake_up_klogd(void)
 {
        if (waitqueue_active(&log_wait))
-               this_cpu_write(printk_pending, 1);
+               this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
 }
 
 /**
@@ -1621,6 +1643,26 @@ late_initcall(printk_late_init);
 
 #if defined CONFIG_PRINTK
 
+int printk_sched(const char *fmt, ...)
+{
+       unsigned long flags;
+       va_list args;
+       char *buf;
+       int r;
+
+       local_irq_save(flags);
+       buf = __get_cpu_var(printk_sched_buf);
+
+       va_start(args, fmt);
+       r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args);
+       va_end(args);
+
+       __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+       local_irq_restore(flags);
+
+       return r;
+}
+
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1732,6 +1774,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
        unsigned long l1, l2;
        unsigned long flags;
 
+       if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
+               return;
+
        /* Theoretically, the log could move on after we do this, but
           there's not a lot we can do about that. The new messages
           will overwrite the start of what we dump. */
index aa88baa..8ba99cd 100644 (file)
  * Process-level increment to ->dynticks_nesting field.  This allows for
  * architectures that use half-interrupts and half-exceptions from
  * process context.
+ *
+ * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
+ * that counts the number of process-based reasons why RCU cannot
+ * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
+ * is the value used to increment or decrement this field.
+ *
+ * The rest of the bits could in principle be used to count interrupts,
+ * but this would mean that a negative-one value in the interrupt
+ * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
+ * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
+ * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
+ * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
+ * initial exit from idle.
  */
-#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1)
+#define DYNTICK_TASK_NEST_WIDTH 7
+#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
+#define DYNTICK_TASK_NEST_MASK  (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
+#define DYNTICK_TASK_FLAG         ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
+#define DYNTICK_TASK_MASK         ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
+#define DYNTICK_TASK_EXIT_IDLE    (DYNTICK_TASK_NEST_VALUE + \
+                                   DYNTICK_TASK_FLAG)
 
 /*
  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
@@ -50,7 +69,6 @@ extern struct debug_obj_descr rcuhead_debug_descr;
 
 static inline void debug_rcu_head_queue(struct rcu_head *head)
 {
-       WARN_ON_ONCE((unsigned long)head & 0x3);
        debug_object_activate(head, &rcuhead_debug_descr);
        debug_object_active_state(head, &rcuhead_debug_descr,
                                  STATE_RCU_HEAD_READY,
@@ -76,16 +94,18 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 
 extern void kfree(const void *);
 
-static inline void __rcu_reclaim(char *rn, struct rcu_head *head)
+static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
 {
        unsigned long offset = (unsigned long)head->func;
 
        if (__is_kfree_rcu_offset(offset)) {
                RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
                kfree((void *)head - offset);
+               return 1;
        } else {
                RCU_TRACE(trace_rcu_invoke_callback(rn, head));
                head->func(head);
+               return 0;
        }
 }
 
index 2bc4e13..a86f174 100644 (file)
@@ -88,6 +88,9 @@ EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
  * section.
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
  */
 int rcu_read_lock_bh_held(void)
 {
@@ -95,6 +98,8 @@ int rcu_read_lock_bh_held(void)
                return 1;
        if (rcu_is_cpu_idle())
                return 0;
+       if (!rcu_lockdep_current_cpu_online())
+               return 0;
        return in_softirq() || irqs_disabled();
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
index 977296d..37a5444 100644 (file)
@@ -53,7 +53,7 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "rcutiny_plugin.h"
 
-static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
+static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 
 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
 static void rcu_idle_enter_common(long long oldval)
@@ -88,10 +88,16 @@ void rcu_idle_enter(void)
 
        local_irq_save(flags);
        oldval = rcu_dynticks_nesting;
-       rcu_dynticks_nesting = 0;
+       WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
+       if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
+           DYNTICK_TASK_NEST_VALUE)
+               rcu_dynticks_nesting = 0;
+       else
+               rcu_dynticks_nesting  -= DYNTICK_TASK_NEST_VALUE;
        rcu_idle_enter_common(oldval);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 /*
  * Exit an interrupt handler towards idle.
@@ -140,11 +146,15 @@ void rcu_idle_exit(void)
 
        local_irq_save(flags);
        oldval = rcu_dynticks_nesting;
-       WARN_ON_ONCE(oldval != 0);
-       rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
+       WARN_ON_ONCE(rcu_dynticks_nesting < 0);
+       if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
+               rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+       else
+               rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
        rcu_idle_exit_common(oldval);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
 
 /*
  * Enter an interrupt handler, moving away from idle.
@@ -258,7 +268,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 
        /* If no RCU callbacks ready to invoke, just return. */
        if (&rcp->rcucblist == rcp->donetail) {
-               RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
+               RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
                RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
                                              ACCESS_ONCE(rcp->rcucblist),
                                              need_resched(),
@@ -269,7 +279,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 
        /* Move the ready-to-invoke callbacks to a local list. */
        local_irq_save(flags);
-       RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
+       RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
        list = rcp->rcucblist;
        rcp->rcucblist = *rcp->donetail;
        *rcp->donetail = NULL;
@@ -319,6 +329,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  */
 void synchronize_sched(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_sched() in RCU read-side critical section");
        cond_resched();
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
index 9cb1ae4..22ecea0 100644 (file)
@@ -132,6 +132,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
        RCU_TRACE(.rcb.name = "rcu_preempt")
 };
 
+static void rcu_read_unlock_special(struct task_struct *t);
 static int rcu_preempted_readers_exp(void);
 static void rcu_report_exp_done(void);
 
@@ -146,6 +147,16 @@ static int rcu_cpu_blocking_cur_gp(void)
 /*
  * Check for a running RCU reader.  Because there is only one CPU,
  * there can be but one running RCU reader at a time.  ;-)
+ *
+ * Returns zero if there are no running readers.  Returns a positive
+ * number if there is at least one reader within its RCU read-side
+ * critical section.  Returns a negative number if an outermost reader
+ * is in the midst of exiting from its RCU read-side critical section
+ *
+ * Returns zero if there are no running readers.  Returns a positive
+ * number if there is at least one reader within its RCU read-side
+ * critical section.  Returns a negative number if an outermost reader
+ * is in the midst of exiting from its RCU read-side critical section.
  */
 static int rcu_preempt_running_reader(void)
 {
@@ -307,7 +318,6 @@ static int rcu_boost(void)
        t = container_of(tb, struct task_struct, rcu_node_entry);
        rt_mutex_init_proxy_locked(&mtx, t);
        t->rcu_boost_mutex = &mtx;
-       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
        raw_local_irq_restore(flags);
        rt_mutex_lock(&mtx);
        rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
@@ -475,7 +485,7 @@ void rcu_preempt_note_context_switch(void)
        unsigned long flags;
 
        local_irq_save(flags); /* must exclude scheduler_tick(). */
-       if (rcu_preempt_running_reader() &&
+       if (rcu_preempt_running_reader() > 0 &&
            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 
                /* Possibly blocking in an RCU read-side critical section. */
@@ -494,6 +504,13 @@ void rcu_preempt_note_context_switch(void)
                list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
                if (rcu_cpu_blocking_cur_gp())
                        rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
+       } else if (rcu_preempt_running_reader() < 0 &&
+                  t->rcu_read_unlock_special) {
+               /*
+                * Complete exit from RCU read-side critical section on
+                * behalf of preempted instance of __rcu_read_unlock().
+                */
+               rcu_read_unlock_special(t);
        }
 
        /*
@@ -526,12 +543,15 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-static void rcu_read_unlock_special(struct task_struct *t)
+static noinline void rcu_read_unlock_special(struct task_struct *t)
 {
        int empty;
        int empty_exp;
        unsigned long flags;
        struct list_head *np;
+#ifdef CONFIG_RCU_BOOST
+       struct rt_mutex *rbmp = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
        int special;
 
        /*
@@ -552,7 +572,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
                rcu_preempt_cpu_qs();
 
        /* Hardware IRQ handlers cannot block. */
-       if (in_irq()) {
+       if (in_irq() || in_serving_softirq()) {
                local_irq_restore(flags);
                return;
        }
@@ -597,10 +617,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
        }
 #ifdef CONFIG_RCU_BOOST
        /* Unboost self if was boosted. */
-       if (special & RCU_READ_UNLOCK_BOOSTED) {
-               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
-               rt_mutex_unlock(t->rcu_boost_mutex);
+       if (t->rcu_boost_mutex != NULL) {
+               rbmp = t->rcu_boost_mutex;
                t->rcu_boost_mutex = NULL;
+               rt_mutex_unlock(rbmp);
        }
 #endif /* #ifdef CONFIG_RCU_BOOST */
        local_irq_restore(flags);
@@ -618,13 +638,22 @@ void __rcu_read_unlock(void)
        struct task_struct *t = current;
 
        barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
-       --t->rcu_read_lock_nesting;
-       barrier();  /* decrement before load of ->rcu_read_unlock_special */
-       if (t->rcu_read_lock_nesting == 0 &&
-           unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-               rcu_read_unlock_special(t);
+       if (t->rcu_read_lock_nesting != 1)
+               --t->rcu_read_lock_nesting;
+       else {
+               t->rcu_read_lock_nesting = INT_MIN;
+               barrier();  /* assign before ->rcu_read_unlock_special load */
+               if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+                       rcu_read_unlock_special(t);
+               barrier();  /* ->rcu_read_unlock_special load before assign */
+               t->rcu_read_lock_nesting = 0;
+       }
 #ifdef CONFIG_PROVE_LOCKING
-       WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
+       {
+               int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+               WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+       }
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -649,7 +678,7 @@ static void rcu_preempt_check_callbacks(void)
                invoke_rcu_callbacks();
        if (rcu_preempt_gp_in_progress() &&
            rcu_cpu_blocking_cur_gp() &&
-           rcu_preempt_running_reader())
+           rcu_preempt_running_reader() > 0)
                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
@@ -706,6 +735,11 @@ EXPORT_SYMBOL_GPL(call_rcu);
  */
 void synchronize_rcu(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu() in RCU read-side critical section");
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (!rcu_scheduler_active)
                return;
@@ -882,7 +916,8 @@ static void rcu_preempt_process_callbacks(void)
 static void invoke_rcu_callbacks(void)
 {
        have_rcu_kthread_work = 1;
-       wake_up(&rcu_kthread_wq);
+       if (rcu_kthread_task != NULL)
+               wake_up(&rcu_kthread_wq);
 }
 
 #ifdef CONFIG_RCU_TRACE
@@ -943,12 +978,16 @@ early_initcall(rcu_spawn_kthreads);
 
 #else /* #ifdef CONFIG_RCU_BOOST */
 
+/* Hold off callback invocation until early_initcall() time. */
+static int rcu_scheduler_fully_active __read_mostly;
+
 /*
  * Start up softirq processing of callbacks.
  */
 void invoke_rcu_callbacks(void)
 {
-       raise_softirq(RCU_SOFTIRQ);
+       if (rcu_scheduler_fully_active)
+               raise_softirq(RCU_SOFTIRQ);
 }
 
 #ifdef CONFIG_RCU_TRACE
@@ -963,10 +1002,14 @@ static bool rcu_is_callbacks_kthread(void)
 
 #endif /* #ifdef CONFIG_RCU_TRACE */
 
-void rcu_init(void)
+static int __init rcu_scheduler_really_started(void)
 {
+       rcu_scheduler_fully_active = 1;
        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+       raise_softirq(RCU_SOFTIRQ);  /* Invoke any callbacks from early boot. */
+       return 0;
 }
+early_initcall(rcu_scheduler_really_started);
 
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
index a58ac28..a89b381 100644 (file)
@@ -65,7 +65,10 @@ static int fqs_duration;     /* Duration of bursts (us), 0 to disable. */
 static int fqs_holdoff;                /* Hold time within burst (us). */
 static int fqs_stutter = 3;    /* Wait time between bursts (s). */
 static int onoff_interval;     /* Wait time between CPU hotplugs, 0=disable. */
+static int onoff_holdoff;      /* Seconds after boot before CPU hotplugs. */
 static int shutdown_secs;      /* Shutdown time (s).  <=0 for no shutdown. */
+static int stall_cpu;          /* CPU-stall duration (s).  0 for no stall. */
+static int stall_cpu_holdoff = 10; /* Time to wait until stall (s).  */
 static int test_boost = 1;     /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
 static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
 static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
@@ -95,8 +98,14 @@ module_param(fqs_stutter, int, 0444);
 MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
 module_param(onoff_interval, int, 0444);
 MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
+module_param(onoff_holdoff, int, 0444);
+MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)");
 module_param(shutdown_secs, int, 0444);
 MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable.");
+module_param(stall_cpu, int, 0444);
+MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable.");
+module_param(stall_cpu_holdoff, int, 0444);
+MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s).");
 module_param(test_boost, int, 0444);
 MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
 module_param(test_boost_interval, int, 0444);
@@ -129,6 +138,7 @@ static struct task_struct *shutdown_task;
 #ifdef CONFIG_HOTPLUG_CPU
 static struct task_struct *onoff_task;
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static struct task_struct *stall_task;
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -990,12 +1000,12 @@ static void rcu_torture_timer(unsigned long unused)
                                  rcu_read_lock_bh_held() ||
                                  rcu_read_lock_sched_held() ||
                                  srcu_read_lock_held(&srcu_ctl));
-       do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
        if (p == NULL) {
                /* Leave because rcu_torture_writer is not yet underway */
                cur_ops->readunlock(idx);
                return;
        }
+       do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
        if (p->rtort_mbtest == 0)
                atomic_inc(&n_rcu_torture_mberror);
        spin_lock(&rand_lock);
@@ -1053,13 +1063,13 @@ rcu_torture_reader(void *arg)
                                          rcu_read_lock_bh_held() ||
                                          rcu_read_lock_sched_held() ||
                                          srcu_read_lock_held(&srcu_ctl));
-               do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
                if (p == NULL) {
                        /* Wait for rcu_torture_writer to get underway */
                        cur_ops->readunlock(idx);
                        schedule_timeout_interruptible(HZ);
                        continue;
                }
+               do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
                if (p->rtort_mbtest == 0)
                        atomic_inc(&n_rcu_torture_mberror);
                cur_ops->read_delay(&rand);
@@ -1300,13 +1310,13 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
                "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
                "test_boost=%d/%d test_boost_interval=%d "
                "test_boost_duration=%d shutdown_secs=%d "
-               "onoff_interval=%d\n",
+               "onoff_interval=%d onoff_holdoff=%d\n",
                torture_type, tag, nrealreaders, nfakewriters,
                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
                stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
                test_boost, cur_ops->can_boost,
                test_boost_interval, test_boost_duration, shutdown_secs,
-               onoff_interval);
+               onoff_interval, onoff_holdoff);
 }
 
 static struct notifier_block rcutorture_shutdown_nb = {
@@ -1410,6 +1420,11 @@ rcu_torture_onoff(void *arg)
        for_each_online_cpu(cpu)
                maxcpu = cpu;
        WARN_ON(maxcpu < 0);
+       if (onoff_holdoff > 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff");
+               schedule_timeout_interruptible(onoff_holdoff * HZ);
+               VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff");
+       }
        while (!kthread_should_stop()) {
                cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
                if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
@@ -1450,12 +1465,15 @@ rcu_torture_onoff(void *arg)
 static int __cpuinit
 rcu_torture_onoff_init(void)
 {
+       int ret;
+
        if (onoff_interval <= 0)
                return 0;
        onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
        if (IS_ERR(onoff_task)) {
+               ret = PTR_ERR(onoff_task);
                onoff_task = NULL;
-               return PTR_ERR(onoff_task);
+               return ret;
        }
        return 0;
 }
@@ -1481,6 +1499,63 @@ static void rcu_torture_onoff_cleanup(void)
 
 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
 
+/*
+ * CPU-stall kthread.  It waits as specified by stall_cpu_holdoff, then
+ * induces a CPU stall for the time specified by stall_cpu.
+ */
+static int __cpuinit rcu_torture_stall(void *args)
+{
+       unsigned long stop_at;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_stall task started");
+       if (stall_cpu_holdoff > 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff");
+               schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
+               VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff");
+       }
+       if (!kthread_should_stop()) {
+               stop_at = get_seconds() + stall_cpu;
+               /* RCU CPU stall is expected behavior in following code. */
+               printk(KERN_ALERT "rcu_torture_stall start.\n");
+               rcu_read_lock();
+               preempt_disable();
+               while (ULONG_CMP_LT(get_seconds(), stop_at))
+                       continue;  /* Induce RCU CPU stall warning. */
+               preempt_enable();
+               rcu_read_unlock();
+               printk(KERN_ALERT "rcu_torture_stall end.\n");
+       }
+       rcutorture_shutdown_absorb("rcu_torture_stall");
+       while (!kthread_should_stop())
+               schedule_timeout_interruptible(10 * HZ);
+       return 0;
+}
+
+/* Spawn CPU-stall kthread, if stall_cpu specified. */
+static int __init rcu_torture_stall_init(void)
+{
+       int ret;
+
+       if (stall_cpu <= 0)
+               return 0;
+       stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall");
+       if (IS_ERR(stall_task)) {
+               ret = PTR_ERR(stall_task);
+               stall_task = NULL;
+               return ret;
+       }
+       return 0;
+}
+
+/* Clean up after the CPU-stall kthread, if one was spawned. */
+static void rcu_torture_stall_cleanup(void)
+{
+       if (stall_task == NULL)
+               return;
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
+       kthread_stop(stall_task);
+}
+
 static int rcutorture_cpu_notify(struct notifier_block *self,
                                 unsigned long action, void *hcpu)
 {
@@ -1523,6 +1598,7 @@ rcu_torture_cleanup(void)
        fullstop = FULLSTOP_RMMOD;
        mutex_unlock(&fullstop_mutex);
        unregister_reboot_notifier(&rcutorture_shutdown_nb);
+       rcu_torture_stall_cleanup();
        if (stutter_task) {
                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
                kthread_stop(stutter_task);
@@ -1602,6 +1678,10 @@ rcu_torture_cleanup(void)
                cur_ops->cleanup();
        if (atomic_read(&n_rcu_torture_error))
                rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
+       else if (n_online_successes != n_online_attempts ||
+                n_offline_successes != n_offline_attempts)
+               rcu_torture_print_module_parms(cur_ops,
+                                              "End of test: RCU_HOTPLUG");
        else
                rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
 }
@@ -1819,6 +1899,7 @@ rcu_torture_init(void)
        }
        rcu_torture_onoff_init();
        register_reboot_notifier(&rcutorture_shutdown_nb);
+       rcu_torture_stall_init();
        rcutorture_record_test_transition();
        mutex_unlock(&fullstop_mutex);
        return 0;
index 6c4a672..1050d6d 100644 (file)
@@ -50,6 +50,8 @@
 #include <linux/wait.h>
 #include <linux/kthread.h>
 #include <linux/prefetch.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
 
 #include "rcutree.h"
 #include <trace/events/rcu.h>
@@ -196,7 +198,7 @@ void rcu_note_context_switch(int cpu)
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-       .dynticks_nesting = DYNTICK_TASK_NESTING,
+       .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
        .dynticks = ATOMIC_INIT(1),
 };
 
@@ -208,8 +210,11 @@ module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
 
-int rcu_cpu_stall_suppress __read_mostly;
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
 module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);
 
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
 static int rcu_pending(int cpu);
@@ -301,8 +306,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
        return &rsp->node[0];
 }
 
-#ifdef CONFIG_SMP
-
 /*
  * If the specified CPU is offline, tell the caller that it is in
  * a quiescent state.  Otherwise, whack it with a reschedule IPI.
@@ -317,30 +320,21 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 {
        /*
-        * If the CPU is offline, it is in a quiescent state.  We can
-        * trust its state not to change because interrupts are disabled.
+        * If the CPU is offline for more than a jiffy, it is in a quiescent
+        * state.  We can trust its state not to change because interrupts
+        * are disabled.  The reason for the jiffy's worth of slack is to
+        * handle CPUs initializing on the way up and finding their way
+        * to the idle loop on the way down.
         */
-       if (cpu_is_offline(rdp->cpu)) {
+       if (cpu_is_offline(rdp->cpu) &&
+           ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
                trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
                rdp->offline_fqs++;
                return 1;
        }
-
-       /*
-        * The CPU is online, so send it a reschedule IPI.  This forces
-        * it through the scheduler, and (inefficiently) also handles cases
-        * where idle loops fail to inform RCU about the CPU being idle.
-        */
-       if (rdp->cpu != smp_processor_id())
-               smp_send_reschedule(rdp->cpu);
-       else
-               set_need_resched();
-       rdp->resched_ipi++;
        return 0;
 }
 
-#endif /* #ifdef CONFIG_SMP */
-
 /*
  * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
  *
@@ -366,6 +360,17 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
        atomic_inc(&rdtp->dynticks);
        smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+
+       /*
+        * The idle task is not permitted to enter the idle loop while
+        * in an RCU read-side critical section.
+        */
+       rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
+                          "Illegal idle entry in RCU read-side critical section.");
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
+                          "Illegal idle entry in RCU-bh read-side critical section.");
+       rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
+                          "Illegal idle entry in RCU-sched read-side critical section.");
 }
 
 /**
@@ -389,10 +394,15 @@ void rcu_idle_enter(void)
        local_irq_save(flags);
        rdtp = &__get_cpu_var(rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
-       rdtp->dynticks_nesting = 0;
+       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
+       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+               rdtp->dynticks_nesting = 0;
+       else
+               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
        rcu_idle_enter_common(rdtp, oldval);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 /**
  * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -462,7 +472,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
  * Exit idle mode, in other words, -enter- the mode in which RCU
  * read-side critical sections can occur.
  *
- * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
+ * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
  * allow for the possibility of usermode upcalls messing up our count
  * of interrupt nesting level during the busy period that is just
  * now starting.
@@ -476,11 +486,15 @@ void rcu_idle_exit(void)
        local_irq_save(flags);
        rdtp = &__get_cpu_var(rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE(oldval != 0);
-       rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
+       WARN_ON_ONCE(oldval < 0);
+       if (oldval & DYNTICK_TASK_NEST_MASK)
+               rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+       else
+               rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
        rcu_idle_exit_common(rdtp, oldval);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
 
 /**
  * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -581,6 +595,49 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Is the current CPU online?  Disable preemption to avoid false positives
+ * that could otherwise happen due to the current CPU number being sampled,
+ * this task being preempted, its old CPU being taken offline, resuming
+ * on some other CPU, then determining that its old CPU is now offline.
+ * It is OK to use RCU on an offline processor during initial boot, hence
+ * the check for rcu_scheduler_fully_active.  Note also that it is OK
+ * for a CPU coming online to use RCU for one jiffy prior to marking itself
+ * online in the cpu_online_mask.  Similarly, it is OK for a CPU going
+ * offline to continue to use RCU for one jiffy after marking itself
+ * offline in the cpu_online_mask.  This leniency is necessary given the
+ * non-atomic nature of the online and offline processing, for example,
+ * the fact that a CPU enters the scheduler after completing the CPU_DYING
+ * notifiers.
+ *
+ * This is also why RCU internally marks CPUs online during the
+ * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
+ *
+ * Disable checking if in an NMI handler because we cannot safely report
+ * errors from NMI handlers anyway.
+ */
+bool rcu_lockdep_current_cpu_online(void)
+{
+       struct rcu_data *rdp;
+       struct rcu_node *rnp;
+       bool ret;
+
+       if (in_nmi())
+               return 1;
+       preempt_disable();
+       rdp = &__get_cpu_var(rcu_sched_data);
+       rnp = rdp->mynode;
+       ret = (rdp->grpmask & rnp->qsmaskinit) ||
+             !rcu_scheduler_fully_active;
+       preempt_enable();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
 #endif /* #ifdef CONFIG_PROVE_RCU */
 
 /**
@@ -595,8 +652,6 @@ int rcu_is_cpu_rrupt_from_idle(void)
        return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
 }
 
-#ifdef CONFIG_SMP
-
 /*
  * Snapshot the specified CPU's dynticks counter so that we can later
  * credit them with an implicit quiescent state.  Return 1 if this CPU
@@ -640,12 +695,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
        return rcu_implicit_offline_qs(rdp);
 }
 
-#endif /* #ifdef CONFIG_SMP */
+static int jiffies_till_stall_check(void)
+{
+       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+       /*
+        * Limit check must be consistent with the Kconfig limits
+        * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+        */
+       if (till_stall_check < 3) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+               till_stall_check = 3;
+       } else if (till_stall_check > 300) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+               till_stall_check = 300;
+       }
+       return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
 
 static void record_gp_stall_check_time(struct rcu_state *rsp)
 {
        rsp->gp_start = jiffies;
-       rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+       rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
 }
 
 static void print_other_cpu_stall(struct rcu_state *rsp)
@@ -664,13 +735,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
-       rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
-
-       /*
-        * Now rat on any tasks that got kicked up to the root rcu_node
-        * due to CPU offlining.
-        */
-       ndetected = rcu_print_task_stall(rnp);
+       rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        /*
@@ -678,8 +743,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
         * See Documentation/RCU/stallwarn.txt for info on how to debug
         * RCU CPU stall warnings.
         */
-       printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
+       printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
               rsp->name);
+       print_cpu_stall_info_begin();
        rcu_for_each_leaf_node(rsp, rnp) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
                ndetected += rcu_print_task_stall(rnp);
@@ -688,11 +754,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
                        continue;
                for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
                        if (rnp->qsmask & (1UL << cpu)) {
-                               printk(" %d", rnp->grplo + cpu);
+                               print_cpu_stall_info(rsp, rnp->grplo + cpu);
                                ndetected++;
                        }
        }
-       printk("} (detected by %d, t=%ld jiffies)\n",
+
+       /*
+        * Now rat on any tasks that got kicked up to the root rcu_node
+        * due to CPU offlining.
+        */
+       rnp = rcu_get_root(rsp);
+       raw_spin_lock_irqsave(&rnp->lock, flags);
+       ndetected = rcu_print_task_stall(rnp);
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       print_cpu_stall_info_end();
+       printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
               smp_processor_id(), (long)(jiffies - rsp->gp_start));
        if (ndetected == 0)
                printk(KERN_ERR "INFO: Stall ended before state dump start\n");
@@ -716,15 +793,18 @@ static void print_cpu_stall(struct rcu_state *rsp)
         * See Documentation/RCU/stallwarn.txt for info on how to debug
         * RCU CPU stall warnings.
         */
-       printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
-              rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
+       printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
+       print_cpu_stall_info_begin();
+       print_cpu_stall_info(rsp, smp_processor_id());
+       print_cpu_stall_info_end();
+       printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
        if (!trigger_all_cpu_backtrace())
                dump_stack();
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
        if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
-               rsp->jiffies_stall =
-                       jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+               rsp->jiffies_stall = jiffies +
+                                    3 * jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        set_need_resched();  /* kick ourselves to get things going. */
@@ -807,6 +887,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
                        rdp->passed_quiesce = 0;
                } else
                        rdp->qs_pending = 0;
+               zero_cpu_stall_ticks(rdp);
        }
 }
 
@@ -943,6 +1024,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
  * in preparation for detecting the next grace period.  The caller must hold
  * the root node's ->lock, which is released before return.  Hard irqs must
  * be disabled.
+ *
+ * Note that it is legal for a dying CPU (which is marked as offline) to
+ * invoke this function.  This can happen when the dying CPU reports its
+ * quiescent state.
  */
 static void
 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
@@ -980,26 +1065,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
        rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
        rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
        record_gp_stall_check_time(rsp);
-
-       /* Special-case the common single-level case. */
-       if (NUM_RCU_NODES == 1) {
-               rcu_preempt_check_blocked_tasks(rnp);
-               rnp->qsmask = rnp->qsmaskinit;
-               rnp->gpnum = rsp->gpnum;
-               rnp->completed = rsp->completed;
-               rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
-               rcu_start_gp_per_cpu(rsp, rnp, rdp);
-               rcu_preempt_boost_start_gp(rnp);
-               trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
-                                           rnp->level, rnp->grplo,
-                                           rnp->grphi, rnp->qsmask);
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-
        raw_spin_unlock(&rnp->lock);  /* leave irqs disabled. */
 
-
        /* Exclude any concurrent CPU-hotplug operations. */
        raw_spin_lock(&rsp->onofflock);  /* irqs already disabled. */
 
@@ -1245,53 +1312,115 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 
 /*
  * Move a dying CPU's RCU callbacks to online CPU's callback list.
- * Synchronization is not required because this function executes
- * in stop_machine() context.
+ * Also record a quiescent state for this CPU for the current grace period.
+ * Synchronization and interrupt disabling are not required because
+ * this function executes in stop_machine() context.  Therefore, cleanup
+ * operations that might block must be done later from the CPU_DEAD
+ * notifier.
+ *
+ * Note that the outgoing CPU's bit has already been cleared in the
+ * cpu_online_mask.  This allows us to randomly pick a callback
+ * destination from the bits set in that mask.
  */
-static void rcu_send_cbs_to_online(struct rcu_state *rsp)
+static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
 {
        int i;
-       /* current DYING CPU is cleared in the cpu_online_mask */
+       unsigned long mask;
        int receive_cpu = cpumask_any(cpu_online_mask);
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
        struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
+       RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
+
+       /* First, adjust the counts. */
+       if (rdp->nxtlist != NULL) {
+               receive_rdp->qlen_lazy += rdp->qlen_lazy;
+               receive_rdp->qlen += rdp->qlen;
+               rdp->qlen_lazy = 0;
+               rdp->qlen = 0;
+       }
 
-       if (rdp->nxtlist == NULL)
-               return;  /* irqs disabled, so comparison is stable. */
+       /*
+        * Next, move ready-to-invoke callbacks to be invoked on some
+        * other CPU.  These will not be required to pass through another
+        * grace period:  They are done, regardless of CPU.
+        */
+       if (rdp->nxtlist != NULL &&
+           rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
+               struct rcu_head *oldhead;
+               struct rcu_head **oldtail;
+               struct rcu_head **newtail;
+
+               oldhead = rdp->nxtlist;
+               oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
+               rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
+               *rdp->nxttail[RCU_DONE_TAIL] = *oldtail;
+               *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
+               newtail = rdp->nxttail[RCU_DONE_TAIL];
+               for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
+                       if (receive_rdp->nxttail[i] == oldtail)
+                               receive_rdp->nxttail[i] = newtail;
+                       if (rdp->nxttail[i] == newtail)
+                               rdp->nxttail[i] = &rdp->nxtlist;
+               }
+       }
 
-       *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
-       receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
-       receive_rdp->qlen += rdp->qlen;
-       receive_rdp->n_cbs_adopted += rdp->qlen;
-       rdp->n_cbs_orphaned += rdp->qlen;
+       /*
+        * Finally, put the rest of the callbacks at the end of the list.
+        * The ones that made it partway through get to start over:  We
+        * cannot assume that grace periods are synchronized across CPUs.
+        * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
+        * this does not seem compelling.  Not yet, anyway.)
+        */
+       if (rdp->nxtlist != NULL) {
+               *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
+               receive_rdp->nxttail[RCU_NEXT_TAIL] =
+                               rdp->nxttail[RCU_NEXT_TAIL];
+               receive_rdp->n_cbs_adopted += rdp->qlen;
+               rdp->n_cbs_orphaned += rdp->qlen;
+
+               rdp->nxtlist = NULL;
+               for (i = 0; i < RCU_NEXT_SIZE; i++)
+                       rdp->nxttail[i] = &rdp->nxtlist;
+       }
 
-       rdp->nxtlist = NULL;
-       for (i = 0; i < RCU_NEXT_SIZE; i++)
-               rdp->nxttail[i] = &rdp->nxtlist;
-       rdp->qlen = 0;
+       /*
+        * Record a quiescent state for the dying CPU.  This is safe
+        * only because we have already cleared out the callbacks.
+        * (Otherwise, the RCU core might try to schedule the invocation
+        * of callbacks on this now-offline CPU, which would be bad.)
+        */
+       mask = rdp->grpmask;    /* rnp->grplo is constant. */
+       trace_rcu_grace_period(rsp->name,
+                              rnp->gpnum + 1 - !!(rnp->qsmask & mask),
+                              "cpuofl");
+       rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
+       /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
 }
 
 /*
- * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
- * and move all callbacks from the outgoing CPU to the current one.
+ * The CPU has been completely removed, and some other CPU is reporting
+ * this fact from process context.  Do the remainder of the cleanup.
  * There can only be one CPU hotplug operation at a time, so no other
  * CPU can be attempting to update rcu_cpu_kthread_task.
  */
-static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
+static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 {
        unsigned long flags;
        unsigned long mask;
        int need_report = 0;
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp;
+       struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rnp. */
 
+       /* Adjust any no-longer-needed kthreads. */
        rcu_stop_cpu_kthread(cpu);
+       rcu_node_kthread_setaffinity(rnp, -1);
+
+       /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
 
        /* Exclude any attempts to start a new grace period. */
        raw_spin_lock_irqsave(&rsp->onofflock, flags);
 
        /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
-       rnp = rdp->mynode;      /* this is the outgoing CPU's rnp. */
        mask = rdp->grpmask;    /* rnp->grplo is constant. */
        do {
                raw_spin_lock(&rnp->lock);      /* irqs already disabled. */
@@ -1299,20 +1428,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
                if (rnp->qsmaskinit != 0) {
                        if (rnp != rdp->mynode)
                                raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-                       else
-                               trace_rcu_grace_period(rsp->name,
-                                                      rnp->gpnum + 1 -
-                                                      !!(rnp->qsmask & mask),
-                                                      "cpuofl");
                        break;
                }
-               if (rnp == rdp->mynode) {
-                       trace_rcu_grace_period(rsp->name,
-                                              rnp->gpnum + 1 -
-                                              !!(rnp->qsmask & mask),
-                                              "cpuofl");
+               if (rnp == rdp->mynode)
                        need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
-               } else
+               else
                        raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
                mask = rnp->grpmask;
                rnp = rnp->parent;
@@ -1332,29 +1452,15 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        if (need_report & RCU_OFL_TASKS_EXP_GP)
                rcu_report_exp_rnp(rsp, rnp, true);
-       rcu_node_kthread_setaffinity(rnp, -1);
-}
-
-/*
- * Remove the specified CPU from the RCU hierarchy and move any pending
- * callbacks that it might have to the current CPU.  This code assumes
- * that at least one CPU in the system will remain running at all times.
- * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
- */
-static void rcu_offline_cpu(int cpu)
-{
-       __rcu_offline_cpu(cpu, &rcu_sched_state);
-       __rcu_offline_cpu(cpu, &rcu_bh_state);
-       rcu_preempt_offline_cpu(cpu);
 }
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
-static void rcu_send_cbs_to_online(struct rcu_state *rsp)
+static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
 {
 }
 
-static void rcu_offline_cpu(int cpu)
+static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 {
 }
 
@@ -1368,11 +1474,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 {
        unsigned long flags;
        struct rcu_head *next, *list, **tail;
-       int bl, count;
+       int bl, count, count_lazy;
 
        /* If no callbacks are ready, just return.*/
        if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
-               trace_rcu_batch_start(rsp->name, 0, 0);
+               trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
                trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
                                    need_resched(), is_idle_task(current),
                                    rcu_is_callbacks_kthread());
@@ -1384,8 +1490,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
         * races with call_rcu() from interrupt handlers.
         */
        local_irq_save(flags);
+       WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
        bl = rdp->blimit;
-       trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
+       trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
        list = rdp->nxtlist;
        rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
        *rdp->nxttail[RCU_DONE_TAIL] = NULL;
@@ -1396,12 +1503,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        local_irq_restore(flags);
 
        /* Invoke callbacks. */
-       count = 0;
+       count = count_lazy = 0;
        while (list) {
                next = list->next;
                prefetch(next);
                debug_rcu_head_unqueue(list);
-               __rcu_reclaim(rsp->name, list);
+               if (__rcu_reclaim(rsp->name, list))
+                       count_lazy++;
                list = next;
                /* Stop only if limit reached and CPU has something to do. */
                if (++count >= bl &&
@@ -1416,6 +1524,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
                            rcu_is_callbacks_kthread());
 
        /* Update count, and requeue any remaining callbacks. */
+       rdp->qlen_lazy -= count_lazy;
        rdp->qlen -= count;
        rdp->n_cbs_invoked += count;
        if (list != NULL) {
@@ -1458,6 +1567,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 void rcu_check_callbacks(int cpu, int user)
 {
        trace_rcu_utilization("Start scheduler-tick");
+       increment_cpu_stall_ticks();
        if (user || rcu_is_cpu_rrupt_from_idle()) {
 
                /*
@@ -1492,8 +1602,6 @@ void rcu_check_callbacks(int cpu, int user)
        trace_rcu_utilization("End scheduler-tick");
 }
 
-#ifdef CONFIG_SMP
-
 /*
  * Scan the leaf rcu_node structures, processing dyntick state for any that
  * have not yet encountered a quiescent state, using the function specified.
@@ -1616,15 +1724,6 @@ unlock_fqs_ret:
        trace_rcu_utilization("End fqs");
 }
 
-#else /* #ifdef CONFIG_SMP */
-
-static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
-{
-       set_need_resched();
-}
-
-#endif /* #else #ifdef CONFIG_SMP */
-
 /*
  * This does the RCU core processing work for the specified rcu_state
  * and rcu_data structures.  This may be called only from the CPU to
@@ -1702,11 +1801,12 @@ static void invoke_rcu_core(void)
 
 static void
 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
-          struct rcu_state *rsp)
+          struct rcu_state *rsp, bool lazy)
 {
        unsigned long flags;
        struct rcu_data *rdp;
 
+       WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
        debug_rcu_head_queue(head);
        head->func = func;
        head->next = NULL;
@@ -1720,18 +1820,21 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         * a quiescent state betweentimes.
         */
        local_irq_save(flags);
+       WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
        rdp = this_cpu_ptr(rsp->rda);
 
        /* Add the callback to our list. */
        *rdp->nxttail[RCU_NEXT_TAIL] = head;
        rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
        rdp->qlen++;
+       if (lazy)
+               rdp->qlen_lazy++;
 
        if (__is_kfree_rcu_offset((unsigned long)func))
                trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
-                                        rdp->qlen);
+                                        rdp->qlen_lazy, rdp->qlen);
        else
-               trace_rcu_callback(rsp->name, head, rdp->qlen);
+               trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
 
        /* If interrupts were disabled, don't dive into RCU core. */
        if (irqs_disabled_flags(flags)) {
@@ -1778,16 +1881,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
  */
 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
-       __call_rcu(head, func, &rcu_sched_state);
+       __call_rcu(head, func, &rcu_sched_state, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
 /*
- * Queue an RCU for invocation after a quicker grace period.
+ * Queue an RCU callback for invocation after a quicker grace period.
  */
 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
-       __call_rcu(head, func, &rcu_bh_state);
+       __call_rcu(head, func, &rcu_bh_state, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 
@@ -1816,6 +1919,10 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
  */
 void synchronize_sched(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_sched() in RCU-sched read-side critical section");
        if (rcu_blocking_is_gp())
                return;
        wait_rcu_gp(call_rcu_sched);
@@ -1833,12 +1940,137 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
  */
 void synchronize_rcu_bh(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
        if (rcu_blocking_is_gp())
                return;
        wait_rcu_gp(call_rcu_bh);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
+
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+       /*
+        * There must be a full memory barrier on each affected CPU
+        * between the time that try_stop_cpus() is called and the
+        * time that it returns.
+        *
+        * In the current initial implementation of cpu_stop, the
+        * above condition is already met when the control reaches
+        * this point and the following smp_mb() is not strictly
+        * necessary.  Do smp_mb() anyway for documentation and
+        * robustness against future implementation changes.
+        */
+       smp_mb(); /* See above comment block. */
+       return 0;
+}
+
+/**
+ * synchronize_sched_expedited - Brute-force RCU-sched grace period
+ *
+ * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_sched_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_sched() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
+ *
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
+ */
+void synchronize_sched_expedited(void)
+{
+       int firstsnap, s, snap, trycount = 0;
+
+       /* Note that atomic_inc_return() implies full memory barrier. */
+       firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
+       get_online_cpus();
+       WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
+
+       /*
+        * Each pass through the following loop attempts to force a
+        * context switch on each CPU.
+        */
+       while (try_stop_cpus(cpu_online_mask,
+                            synchronize_sched_expedited_cpu_stop,
+                            NULL) == -EAGAIN) {
+               put_online_cpus();
+
+               /* No joy, try again later.  Or just synchronize_sched(). */
+               if (trycount++ < 10)
+                       udelay(trycount * num_online_cpus());
+               else {
+                       synchronize_sched();
+                       return;
+               }
+
+               /* Check to see if someone else did our work for us. */
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       return;
+               }
+
+               /*
+                * Refetching sync_sched_expedited_started allows later
+                * callers to piggyback on our grace period.  We subtract
+                * 1 to get the same token that the last incrementer got.
+                * We retry after they started, so our grace period works
+                * for them, and they started after our first try, so their
+                * grace period works for us.
+                */
+               get_online_cpus();
+               snap = atomic_read(&sync_sched_expedited_started);
+               smp_mb(); /* ensure read is before try_stop_cpus(). */
+       }
+
+       /*
+        * Everyone up to our most recent fetch is covered by our grace
+        * period.  Update the counter, but only if our work is still
+        * relevant -- which it won't be if someone who started later
+        * than we did beat us to the punch.
+        */
+       do {
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       break;
+               }
+       } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+
+       put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1932,7 +2164,7 @@ static int rcu_cpu_has_callbacks(int cpu)
        /* RCU callbacks either ready or pending? */
        return per_cpu(rcu_sched_data, cpu).nxtlist ||
               per_cpu(rcu_bh_data, cpu).nxtlist ||
-              rcu_preempt_needs_cpu(cpu);
+              rcu_preempt_cpu_has_callbacks(cpu);
 }
 
 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -2027,9 +2259,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
        rdp->nxtlist = NULL;
        for (i = 0; i < RCU_NEXT_SIZE; i++)
                rdp->nxttail[i] = &rdp->nxtlist;
+       rdp->qlen_lazy = 0;
        rdp->qlen = 0;
        rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-       WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
+       WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
        WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
        rdp->cpu = cpu;
        rdp->rsp = rsp;
@@ -2057,7 +2290,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
        rdp->qlen_last_fqs_check = 0;
        rdp->n_force_qs_snap = rsp->n_force_qs;
        rdp->blimit = blimit;
-       rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
+       rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
        atomic_set(&rdp->dynticks->dynticks,
                   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
        rcu_prepare_for_idle_init(cpu);
@@ -2139,16 +2372,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
                 * touch any data without introducing corruption. We send the
                 * dying CPU's callbacks to an arbitrarily chosen online CPU.
                 */
-               rcu_send_cbs_to_online(&rcu_bh_state);
-               rcu_send_cbs_to_online(&rcu_sched_state);
-               rcu_preempt_send_cbs_to_online();
+               rcu_cleanup_dying_cpu(&rcu_bh_state);
+               rcu_cleanup_dying_cpu(&rcu_sched_state);
+               rcu_preempt_cleanup_dying_cpu();
                rcu_cleanup_after_idle(cpu);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
-               rcu_offline_cpu(cpu);
+               rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
+               rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
+               rcu_preempt_cleanup_dead_cpu(cpu);
                break;
        default:
                break;
index fddff92..cdd1be0 100644 (file)
@@ -239,6 +239,12 @@ struct rcu_data {
        bool            preemptible;    /* Preemptible RCU? */
        struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
        unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+       unsigned long   ticks_this_gp;  /* The number of scheduling-clock */
+                                       /*  ticks this CPU has handled */
+                                       /*  during and after the last grace */
+                                       /* period it is aware of. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
 
        /* 2) batch handling */
        /*
@@ -265,7 +271,8 @@ struct rcu_data {
         */
        struct rcu_head *nxtlist;
        struct rcu_head **nxttail[RCU_NEXT_SIZE];
-       long            qlen;           /* # of queued callbacks */
+       long            qlen_lazy;      /* # of lazy queued callbacks */
+       long            qlen;           /* # of queued callbacks, incl lazy */
        long            qlen_last_fqs_check;
                                        /* qlen at last check for QS forcing */
        unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
@@ -282,7 +289,6 @@ struct rcu_data {
        /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
        unsigned long dynticks_fqs;     /* Kicked due to dynticks idle. */
        unsigned long offline_fqs;      /* Kicked due to being offline. */
-       unsigned long resched_ipi;      /* Sent a resched IPI. */
 
        /* 5) __rcu_pending() statistics. */
        unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
@@ -313,12 +319,6 @@ struct rcu_data {
 #else
 #define RCU_STALL_DELAY_DELTA         0
 #endif
-
-#define RCU_SECONDS_TILL_STALL_CHECK   (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
-                                       RCU_STALL_DELAY_DELTA)
-                                               /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
-                                               /* for rsp->jiffies_stall */
 #define RCU_STALL_RAT_DELAY            2       /* Allow other CPUs time */
                                                /*  to take at least one */
                                                /*  scheduling clock irq */
@@ -438,8 +438,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
                                     struct rcu_node *rnp,
                                     struct rcu_data *rdp);
-static void rcu_preempt_offline_cpu(int cpu);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_preempt_cleanup_dead_cpu(int cpu);
 static void rcu_preempt_check_callbacks(int cpu);
 static void rcu_preempt_process_callbacks(void);
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
@@ -448,9 +448,9 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
                               bool wake);
 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
 static int rcu_preempt_pending(int cpu);
-static int rcu_preempt_needs_cpu(int cpu);
+static int rcu_preempt_cpu_has_callbacks(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
-static void rcu_preempt_send_cbs_to_online(void);
+static void rcu_preempt_cleanup_dying_cpu(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
@@ -471,5 +471,10 @@ static void __cpuinit rcu_prepare_kthreads(int cpu);
 static void rcu_prepare_for_idle_init(int cpu);
 static void rcu_cleanup_after_idle(int cpu);
 static void rcu_prepare_for_idle(int cpu);
+static void print_cpu_stall_info_begin(void);
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
+static void print_cpu_stall_info_end(void);
+static void zero_cpu_stall_ticks(struct rcu_data *rdp);
+static void increment_cpu_stall_ticks(void);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
index 8bb35d7..c023464 100644 (file)
@@ -25,7 +25,6 @@
  */
 
 #include <linux/delay.h>
-#include <linux/stop_machine.h>
 
 #define RCU_KTHREAD_PRIO 1
 
@@ -63,7 +62,10 @@ static void __init rcu_bootup_announce_oddness(void)
        printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
 #endif
 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
-       printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
+       printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n");
+#endif
+#if defined(CONFIG_RCU_CPU_STALL_INFO)
+       printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
 #endif
 #if NUM_RCU_LVL_4 != 0
        printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
@@ -490,6 +492,31 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 
 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+
+static void rcu_print_task_stall_begin(struct rcu_node *rnp)
+{
+       printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
+              rnp->level, rnp->grplo, rnp->grphi);
+}
+
+static void rcu_print_task_stall_end(void)
+{
+       printk(KERN_CONT "\n");
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+static void rcu_print_task_stall_begin(struct rcu_node *rnp)
+{
+}
+
+static void rcu_print_task_stall_end(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
  * sections, printing out the tid of each.
@@ -501,12 +528,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 
        if (!rcu_preempt_blocked_readers_cgp(rnp))
                return 0;
+       rcu_print_task_stall_begin(rnp);
        t = list_entry(rnp->gp_tasks,
                       struct task_struct, rcu_node_entry);
        list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-               printk(" P%d", t->pid);
+               printk(KERN_CONT " P%d", t->pid);
                ndetected++;
        }
+       rcu_print_task_stall_end();
        return ndetected;
 }
 
@@ -581,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
         * absolutely necessary, but this is a good performance/complexity
         * tradeoff.
         */
-       if (rcu_preempt_blocked_readers_cgp(rnp))
+       if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
                retval |= RCU_OFL_TASKS_NORM_GP;
        if (rcu_preempted_readers_exp(rnp))
                retval |= RCU_OFL_TASKS_EXP_GP;
@@ -618,16 +647,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
        return retval;
 }
 
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
 /*
  * Do CPU-offline processing for preemptible RCU.
  */
-static void rcu_preempt_offline_cpu(int cpu)
+static void rcu_preempt_cleanup_dead_cpu(int cpu)
 {
-       __rcu_offline_cpu(cpu, &rcu_preempt_state);
+       rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
 }
 
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the corresponding CPU's rcu_node structure,
@@ -671,10 +700,24 @@ static void rcu_preempt_do_callbacks(void)
  */
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
-       __call_rcu(head, func, &rcu_preempt_state);
+       __call_rcu(head, func, &rcu_preempt_state, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks.  Until then, this
+ * function may only be called from __kfree_rcu().
+ */
+void kfree_call_rcu(struct rcu_head *head,
+                   void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_preempt_state, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -688,6 +731,10 @@ EXPORT_SYMBOL_GPL(call_rcu);
  */
 void synchronize_rcu(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu() in RCU read-side critical section");
        if (!rcu_scheduler_active)
                return;
        wait_rcu_gp(call_rcu);
@@ -788,10 +835,22 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
                rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
 }
 
-/*
- * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
- * is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blkd_tasks lists and wait for this list to drain.
+/**
+ * synchronize_rcu_expedited - Brute-force RCU grace period
+ *
+ * Wait for an RCU-preempt grace period, but expedite it.  The basic
+ * idea is to invoke synchronize_sched_expedited() to push all the tasks to
+ * the ->blkd_tasks lists and wait for this list to drain.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.
+ * In fact, if you are using synchronize_rcu_expedited() in a loop,
+ * please restructure your code to batch your updates, and then Use a
+ * single synchronize_rcu() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.
  */
 void synchronize_rcu_expedited(void)
 {
@@ -869,9 +928,9 @@ static int rcu_preempt_pending(int cpu)
 }
 
 /*
- * Does preemptible RCU need the CPU to stay out of dynticks mode?
+ * Does preemptible RCU have callbacks on this CPU?
  */
-static int rcu_preempt_needs_cpu(int cpu)
+static int rcu_preempt_cpu_has_callbacks(int cpu)
 {
        return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 }
@@ -894,11 +953,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Move preemptible RCU's callbacks from dying CPU to other online CPU.
+ * Move preemptible RCU's callbacks from dying CPU to other online CPU
+ * and record a quiescent state.
  */
-static void rcu_preempt_send_cbs_to_online(void)
+static void rcu_preempt_cleanup_dying_cpu(void)
 {
-       rcu_send_cbs_to_online(&rcu_preempt_state);
+       rcu_cleanup_dying_cpu(&rcu_preempt_state);
 }
 
 /*
@@ -1034,16 +1094,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
        return 0;
 }
 
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
 /*
  * Because preemptible RCU does not exist, it never needs CPU-offline
  * processing.
  */
-static void rcu_preempt_offline_cpu(int cpu)
+static void rcu_preempt_cleanup_dead_cpu(int cpu)
 {
 }
 
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -1061,6 +1121,22 @@ static void rcu_preempt_process_callbacks(void)
 }
 
 /*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks.  Until then, this
+ * function may only be called from __kfree_rcu().
+ *
+ * Because there is no preemptible RCU, we use RCU-sched instead.
+ */
+void kfree_call_rcu(struct rcu_head *head,
+                   void (*func)(struct rcu_head *rcu))
+{
+       __call_rcu(head, func, &rcu_sched_state, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
+/*
  * Wait for an rcu-preempt grace period, but make it happen quickly.
  * But because preemptible RCU does not exist, map to rcu-sched.
  */
@@ -1093,9 +1169,9 @@ static int rcu_preempt_pending(int cpu)
 }
 
 /*
- * Because preemptible RCU does not exist, it never needs any CPU.
+ * Because preemptible RCU does not exist, it never has callbacks
  */
-static int rcu_preempt_needs_cpu(int cpu)
+static int rcu_preempt_cpu_has_callbacks(int cpu)
 {
        return 0;
 }
@@ -1119,9 +1195,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Because there is no preemptible RCU, there are no callbacks to move.
+ * Because there is no preemptible RCU, there is no cleanup to do.
  */
-static void rcu_preempt_send_cbs_to_online(void)
+static void rcu_preempt_cleanup_dying_cpu(void)
 {
 }
 
@@ -1823,132 +1899,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
 
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
-#ifndef CONFIG_SMP
-
-void synchronize_sched_expedited(void)
-{
-       cond_resched();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#else /* #ifndef CONFIG_SMP */
-
-static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
-static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-       /*
-        * There must be a full memory barrier on each affected CPU
-        * between the time that try_stop_cpus() is called and the
-        * time that it returns.
-        *
-        * In the current initial implementation of cpu_stop, the
-        * above condition is already met when the control reaches
-        * this point and the following smp_mb() is not strictly
-        * necessary.  Do smp_mb() anyway for documentation and
-        * robustness against future implementation changes.
-        */
-       smp_mb(); /* See above comment block. */
-       return 0;
-}
-
-/*
- * Wait for an rcu-sched grace period to elapse, but use "big hammer"
- * approach to force grace period to end quickly.  This consumes
- * significant time on all CPUs, and is thus not recommended for
- * any sort of common-case code.
- *
- * Note that it is illegal to call this function while holding any
- * lock that is acquired by a CPU-hotplug notifier.  Failing to
- * observe this restriction will result in deadlock.
- *
- * This implementation can be thought of as an application of ticket
- * locking to RCU, with sync_sched_expedited_started and
- * sync_sched_expedited_done taking on the roles of the halves
- * of the ticket-lock word.  Each task atomically increments
- * sync_sched_expedited_started upon entry, snapshotting the old value,
- * then attempts to stop all the CPUs.  If this succeeds, then each
- * CPU will have executed a context switch, resulting in an RCU-sched
- * grace period.  We are then done, so we use atomic_cmpxchg() to
- * update sync_sched_expedited_done to match our snapshot -- but
- * only if someone else has not already advanced past our snapshot.
- *
- * On the other hand, if try_stop_cpus() fails, we check the value
- * of sync_sched_expedited_done.  If it has advanced past our
- * initial snapshot, then someone else must have forced a grace period
- * some time after we took our snapshot.  In this case, our work is
- * done for us, and we can simply return.  Otherwise, we try again,
- * but keep our initial snapshot for purposes of checking for someone
- * doing our work for us.
- *
- * If we fail too many times in a row, we fall back to synchronize_sched().
- */
-void synchronize_sched_expedited(void)
-{
-       int firstsnap, s, snap, trycount = 0;
-
-       /* Note that atomic_inc_return() implies full memory barrier. */
-       firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
-       get_online_cpus();
-
-       /*
-        * Each pass through the following loop attempts to force a
-        * context switch on each CPU.
-        */
-       while (try_stop_cpus(cpu_online_mask,
-                            synchronize_sched_expedited_cpu_stop,
-                            NULL) == -EAGAIN) {
-               put_online_cpus();
-
-               /* No joy, try again later.  Or just synchronize_sched(). */
-               if (trycount++ < 10)
-                       udelay(trycount * num_online_cpus());
-               else {
-                       synchronize_sched();
-                       return;
-               }
-
-               /* Check to see if someone else did our work for us. */
-               s = atomic_read(&sync_sched_expedited_done);
-               if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
-                       smp_mb(); /* ensure test happens before caller kfree */
-                       return;
-               }
-
-               /*
-                * Refetching sync_sched_expedited_started allows later
-                * callers to piggyback on our grace period.  We subtract
-                * 1 to get the same token that the last incrementer got.
-                * We retry after they started, so our grace period works
-                * for them, and they started after our first try, so their
-                * grace period works for us.
-                */
-               get_online_cpus();
-               snap = atomic_read(&sync_sched_expedited_started);
-               smp_mb(); /* ensure read is before try_stop_cpus(). */
-       }
-
-       /*
-        * Everyone up to our most recent fetch is covered by our grace
-        * period.  Update the counter, but only if our work is still
-        * relevant -- which it won't be if someone who started later
-        * than we did beat us to the punch.
-        */
-       do {
-               s = atomic_read(&sync_sched_expedited_done);
-               if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
-                       smp_mb(); /* ensure test happens before caller kfree */
-                       break;
-               }
-       } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
-
-       put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#endif /* #else #ifndef CONFIG_SMP */
-
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 
 /*
@@ -1981,7 +1931,7 @@ static void rcu_cleanup_after_idle(int cpu)
 }
 
 /*
- * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
+ * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
  * is nothing.
  */
 static void rcu_prepare_for_idle(int cpu)
@@ -2015,6 +1965,9 @@ static void rcu_prepare_for_idle(int cpu)
  *     number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
  *     system.  And if you are -that- concerned about energy efficiency,
  *     just power the system down and be done with it!
+ * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
+ *     permitted to sleep in dyntick-idle mode with only lazy RCU
+ *     callbacks pending.  Setting this too high can OOM your system.
  *
  * The values below work well in practice.  If future workloads require
  * adjustment, they can be converted into kernel config parameters, though
@@ -2023,11 +1976,13 @@ static void rcu_prepare_for_idle(int cpu)
 #define RCU_IDLE_FLUSHES 5             /* Number of dyntick-idle tries. */
 #define RCU_IDLE_OPT_FLUSHES 3         /* Optional dyntick-idle tries. */
 #define RCU_IDLE_GP_DELAY 6            /* Roughly one grace period. */
+#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)        /* Roughly six seconds. */
 
 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
 static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
-static ktime_t rcu_idle_gp_wait;
+static ktime_t rcu_idle_gp_wait;       /* If some non-lazy callbacks. */
+static ktime_t rcu_idle_lazy_gp_wait;  /* If only lazy callbacks. */
 
 /*
  * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
@@ -2048,6 +2003,48 @@ int rcu_needs_cpu(int cpu)
 }
 
 /*
+ * Does the specified flavor of RCU have non-lazy callbacks pending on
+ * the specified CPU?  Both RCU flavor and CPU are specified by the
+ * rcu_data structure.
+ */
+static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
+{
+       return rdp->qlen != rdp->qlen_lazy;
+}
+
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+/*
+ * Are there non-lazy RCU-preempt callbacks?  (There cannot be if there
+ * is no RCU-preempt in the kernel.)
+ */
+static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+
+       return __rcu_cpu_has_nonlazy_callbacks(rdp);
+}
+
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
+{
+       return 0;
+}
+
+#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+/*
+ * Does any flavor of RCU have non-lazy callbacks on the specified CPU?
+ */
+static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
+{
+       return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
+              __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
+              rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
+}
+
+/*
  * Timer handler used to force CPU to start pushing its remaining RCU
  * callbacks in the case where it entered dyntick-idle mode with callbacks
  * pending.  The hander doesn't really need to do anything because the
@@ -2074,6 +2071,8 @@ static void rcu_prepare_for_idle_init(int cpu)
                unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
 
                rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
+               upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY);
+               rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000);
                firsttime = 0;
        }
 }
@@ -2109,10 +2108,6 @@ static void rcu_cleanup_after_idle(int cpu)
  */
 static void rcu_prepare_for_idle(int cpu)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
-
        /*
         * If there are no callbacks on this CPU, enter dyntick-idle mode.
         * Also reset state to avoid prejudicing later attempts.
@@ -2120,7 +2115,6 @@ static void rcu_prepare_for_idle(int cpu)
        if (!rcu_cpu_has_callbacks(cpu)) {
                per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
                per_cpu(rcu_dyntick_drain, cpu) = 0;
-               local_irq_restore(flags);
                trace_rcu_prep_idle("No callbacks");
                return;
        }
@@ -2130,7 +2124,6 @@ static void rcu_prepare_for_idle(int cpu)
         * refrained from disabling the scheduling-clock tick.
         */
        if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
-               local_irq_restore(flags);
                trace_rcu_prep_idle("In holdoff");
                return;
        }
@@ -2140,18 +2133,22 @@ static void rcu_prepare_for_idle(int cpu)
                /* First time through, initialize the counter. */
                per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
        } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
-                  !rcu_pending(cpu)) {
+                  !rcu_pending(cpu) &&
+                  !local_softirq_pending()) {
                /* Can we go dyntick-idle despite still having callbacks? */
                trace_rcu_prep_idle("Dyntick with callbacks");
                per_cpu(rcu_dyntick_drain, cpu) = 0;
-               per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
-               hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
-                             rcu_idle_gp_wait, HRTIMER_MODE_REL);
+               per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
+               if (rcu_cpu_has_nonlazy_callbacks(cpu))
+                       hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
+                                     rcu_idle_gp_wait, HRTIMER_MODE_REL);
+               else
+                       hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
+                                     rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL);
                return; /* Nothing more to do immediately. */
        } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
                /* We have hit the limit, so time to give up. */
                per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
-               local_irq_restore(flags);
                trace_rcu_prep_idle("Begin holdoff");
                invoke_rcu_core();  /* Force the CPU out of dyntick-idle. */
                return;
@@ -2163,23 +2160,17 @@ static void rcu_prepare_for_idle(int cpu)
         */
 #ifdef CONFIG_TREE_PREEMPT_RCU
        if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
-               local_irq_restore(flags);
                rcu_preempt_qs(cpu);
                force_quiescent_state(&rcu_preempt_state, 0);
-               local_irq_save(flags);
        }
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
        if (per_cpu(rcu_sched_data, cpu).nxtlist) {
-               local_irq_restore(flags);
                rcu_sched_qs(cpu);
                force_quiescent_state(&rcu_sched_state, 0);
-               local_irq_save(flags);
        }
        if (per_cpu(rcu_bh_data, cpu).nxtlist) {
-               local_irq_restore(flags);
                rcu_bh_qs(cpu);
                force_quiescent_state(&rcu_bh_state, 0);
-               local_irq_save(flags);
        }
 
        /*
@@ -2187,13 +2178,124 @@ static void rcu_prepare_for_idle(int cpu)
         * So try forcing the callbacks through the grace period.
         */
        if (rcu_cpu_has_callbacks(cpu)) {
-               local_irq_restore(flags);
                trace_rcu_prep_idle("More callbacks");
                invoke_rcu_core();
-       } else {
-               local_irq_restore(flags);
+       } else
                trace_rcu_prep_idle("Callbacks drained");
-       }
 }
 
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+#ifdef CONFIG_RCU_CPU_STALL_INFO
+
+#ifdef CONFIG_RCU_FAST_NO_HZ
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+       struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
+
+       sprintf(cp, "drain=%d %c timer=%lld",
+               per_cpu(rcu_dyntick_drain, cpu),
+               per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.',
+               hrtimer_active(hrtp)
+                       ? ktime_to_us(hrtimer_get_remaining(hrtp))
+                       : -1);
+}
+
+#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+/* Initiate the stall-info list. */
+static void print_cpu_stall_info_begin(void)
+{
+       printk(KERN_CONT "\n");
+}
+
+/*
+ * Print out diagnostic information for the specified stalled CPU.
+ *
+ * If the specified CPU is aware of the current RCU grace period
+ * (flavor specified by rsp), then print the number of scheduling
+ * clock interrupts the CPU has taken during the time that it has
+ * been aware.  Otherwise, print the number of RCU grace periods
+ * that this CPU is ignorant of, for example, "1" if the CPU was
+ * aware of the previous grace period.
+ *
+ * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
+ */
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
+{
+       char fast_no_hz[72];
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_dynticks *rdtp = rdp->dynticks;
+       char *ticks_title;
+       unsigned long ticks_value;
+
+       if (rsp->gpnum == rdp->gpnum) {
+               ticks_title = "ticks this GP";
+               ticks_value = rdp->ticks_this_gp;
+       } else {
+               ticks_title = "GPs behind";
+               ticks_value = rsp->gpnum - rdp->gpnum;
+       }
+       print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
+       printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
+              cpu, ticks_value, ticks_title,
+              atomic_read(&rdtp->dynticks) & 0xfff,
+              rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
+              fast_no_hz);
+}
+
+/* Terminate the stall-info list. */
+static void print_cpu_stall_info_end(void)
+{
+       printk(KERN_ERR "\t");
+}
+
+/* Zero ->ticks_this_gp for all flavors of RCU. */
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+       rdp->ticks_this_gp = 0;
+}
+
+/* Increment ->ticks_this_gp for all flavors of RCU. */
+static void increment_cpu_stall_ticks(void)
+{
+       __get_cpu_var(rcu_sched_data).ticks_this_gp++;
+       __get_cpu_var(rcu_bh_data).ticks_this_gp++;
+#ifdef CONFIG_TREE_PREEMPT_RCU
+       __get_cpu_var(rcu_preempt_data).ticks_this_gp++;
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+
+static void print_cpu_stall_info_begin(void)
+{
+       printk(KERN_CONT " {");
+}
+
+static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
+{
+       printk(KERN_CONT " %d", cpu);
+}
+
+static void print_cpu_stall_info_end(void)
+{
+       printk(KERN_CONT "} ");
+}
+
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+}
+
+static void increment_cpu_stall_ticks(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
index 654cfe6..ed459ed 100644 (file)
@@ -72,9 +72,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
                   rdp->dynticks->dynticks_nesting,
                   rdp->dynticks->dynticks_nmi_nesting,
                   rdp->dynticks_fqs);
-       seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
-       seq_printf(m, " ql=%ld qs=%c%c%c%c",
-                  rdp->qlen,
+       seq_printf(m, " of=%lu", rdp->offline_fqs);
+       seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
+                  rdp->qlen_lazy, rdp->qlen,
                   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
                        rdp->nxttail[RCU_NEXT_TAIL]],
                   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
@@ -144,8 +144,8 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
                   rdp->dynticks->dynticks_nesting,
                   rdp->dynticks->dynticks_nmi_nesting,
                   rdp->dynticks_fqs);
-       seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
-       seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
+       seq_printf(m, ",%lu", rdp->offline_fqs);
+       seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen,
                   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
                        rdp->nxttail[RCU_NEXT_TAIL]],
                   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
@@ -168,7 +168,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
 {
        seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
        seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
-       seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
+       seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
 #ifdef CONFIG_RCU_BOOST
        seq_puts(m, "\"kt\",\"ktl\"");
 #endif /* #ifdef CONFIG_RCU_BOOST */
index e8a1f83..0984a21 100644 (file)
@@ -195,20 +195,20 @@ __setup("noautogroup", setup_autogroup);
 
 #ifdef CONFIG_PROC_FS
 
-int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
+int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
 {
        static unsigned long next = INITIAL_JIFFIES;
        struct autogroup *ag;
        int err;
 
-       if (*nice < -20 || *nice > 19)
+       if (nice < -20 || nice > 19)
                return -EINVAL;
 
-       err = security_task_setnice(current, *nice);
+       err = security_task_setnice(current, nice);
        if (err)
                return err;
 
-       if (*nice < 0 && !can_nice(current, *nice))
+       if (nice < 0 && !can_nice(current, nice))
                return -EPERM;
 
        /* this is a heavy operation taking global locks.. */
@@ -219,9 +219,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
        ag = autogroup_task_get(p);
 
        down_write(&ag->lock);
-       err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
+       err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
        if (!err)
-               ag->nice = *nice;
+               ag->nice = nice;
        up_write(&ag->lock);
 
        autogroup_kref_put(ag);
index 33a0676..d2bd464 100644 (file)
@@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v)
 
 #ifdef HAVE_JUMP_LABEL
 
-#define jump_label_key__true  jump_label_key_enabled
-#define jump_label_key__false jump_label_key_disabled
+#define jump_label_key__true  STATIC_KEY_INIT_TRUE
+#define jump_label_key__false STATIC_KEY_INIT_FALSE
 
 #define SCHED_FEAT(name, enabled)      \
        jump_label_key__##enabled ,
 
-struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = {
+struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
 #include "features.h"
 };
 
@@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = {
 
 static void sched_feat_disable(int i)
 {
-       if (jump_label_enabled(&sched_feat_keys[i]))
-               jump_label_dec(&sched_feat_keys[i]);
+       if (static_key_enabled(&sched_feat_keys[i]))
+               static_key_slow_dec(&sched_feat_keys[i]);
 }
 
 static void sched_feat_enable(int i)
 {
-       if (!jump_label_enabled(&sched_feat_keys[i]))
-               jump_label_inc(&sched_feat_keys[i]);
+       if (!static_key_enabled(&sched_feat_keys[i]))
+               static_key_slow_inc(&sched_feat_keys[i]);
 }
 #else
 static void sched_feat_disable(int i) { };
@@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
        delta -= irq_delta;
 #endif
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-       if (static_branch((&paravirt_steal_rq_enabled))) {
+       if (static_key_false((&paravirt_steal_rq_enabled))) {
                u64 st;
 
                steal = paravirt_steal_clock(cpu_of(rq));
@@ -1284,7 +1284,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
         * leave kernel.
         */
        if (p->mm && printk_ratelimit()) {
-               printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
+               printk_sched("process %d (%s) no longer affine to cpu%d\n",
                                task_pid_nr(p), p->comm, cpu);
        }
 
@@ -1507,7 +1507,7 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
 }
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 
-static inline int ttwu_share_cache(int this_cpu, int that_cpu)
+bool cpus_share_cache(int this_cpu, int that_cpu)
 {
        return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
@@ -1518,7 +1518,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
        struct rq *rq = cpu_rq(cpu);
 
 #if defined(CONFIG_SMP)
-       if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) {
+       if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
                sched_clock_cpu(cpu); /* sync clocks x-cpu */
                ttwu_queue_remote(p, cpu);
                return;
@@ -2266,13 +2266,10 @@ calc_load_n(unsigned long load, unsigned long exp,
  * Once we've updated the global active value, we need to apply the exponential
  * weights adjusted to the number of cycles missed.
  */
-static void calc_global_nohz(unsigned long ticks)
+static void calc_global_nohz(void)
 {
        long delta, active, n;
 
-       if (time_before(jiffies, calc_load_update))
-               return;
-
        /*
         * If we crossed a calc_load_update boundary, make sure to fold
         * any pending idle changes, the respective CPUs might have
@@ -2284,31 +2281,25 @@ static void calc_global_nohz(unsigned long ticks)
                atomic_long_add(delta, &calc_load_tasks);
 
        /*
-        * If we were idle for multiple load cycles, apply them.
+        * It could be the one fold was all it took, we done!
         */
-       if (ticks >= LOAD_FREQ) {
-               n = ticks / LOAD_FREQ;
+       if (time_before(jiffies, calc_load_update + 10))
+               return;
 
-               active = atomic_long_read(&calc_load_tasks);
-               active = active > 0 ? active * FIXED_1 : 0;
+       /*
+        * Catch-up, fold however many we are behind still
+        */
+       delta = jiffies - calc_load_update - 10;
+       n = 1 + (delta / LOAD_FREQ);
 
-               avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
-               avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
-               avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+       active = atomic_long_read(&calc_load_tasks);
+       active = active > 0 ? active * FIXED_1 : 0;
 
-               calc_load_update += n * LOAD_FREQ;
-       }
+       avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+       avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+       avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
 
-       /*
-        * Its possible the remainder of the above division also crosses
-        * a LOAD_FREQ period, the regular check in calc_global_load()
-        * which comes after this will take care of that.
-        *
-        * Consider us being 11 ticks before a cycle completion, and us
-        * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
-        * age us 4 cycles, and the test in calc_global_load() will
-        * pick up the final one.
-        */
+       calc_load_update += n * LOAD_FREQ;
 }
 #else
 void calc_load_account_idle(struct rq *this_rq)
@@ -2320,7 +2311,7 @@ static inline long calc_load_fold_idle(void)
        return 0;
 }
 
-static void calc_global_nohz(unsigned long ticks)
+static void calc_global_nohz(void)
 {
 }
 #endif
@@ -2348,8 +2339,6 @@ void calc_global_load(unsigned long ticks)
 {
        long active;
 
-       calc_global_nohz(ticks);
-
        if (time_before(jiffies, calc_load_update + 10))
                return;
 
@@ -2361,6 +2350,16 @@ void calc_global_load(unsigned long ticks)
        avenrun[2] = calc_load(avenrun[2], EXP_15, active);
 
        calc_load_update += LOAD_FREQ;
+
+       /*
+        * Account one period with whatever state we found before
+        * folding in the nohz state and ageing the entire idle period.
+        *
+        * This avoids loosing a sample when we go idle between 
+        * calc_load_account_active() (10 ticks ago) and now and thus
+        * under-accounting.
+        */
+       calc_global_nohz();
 }
 
 /*
@@ -2755,7 +2754,7 @@ void account_idle_time(cputime_t cputime)
 static __always_inline bool steal_account_process_tick(void)
 {
 #ifdef CONFIG_PARAVIRT
-       if (static_branch(&paravirt_steal_enabled)) {
+       if (static_key_false(&paravirt_steal_enabled)) {
                u64 steal, st = 0;
 
                steal = paravirt_steal_clock(smp_processor_id());
@@ -3220,14 +3219,14 @@ need_resched:
 
        post_schedule(rq);
 
-       preempt_enable_no_resched();
+       sched_preempt_enable_no_resched();
        if (need_resched())
                goto need_resched;
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
 {
-       if (!tsk->state)
+       if (!tsk->state || tsk_is_pi_blocked(tsk))
                return;
        /*
         * If we are going to sleep and we have plugged IO queued,
@@ -3246,6 +3245,18 @@ asmlinkage void __sched schedule(void)
 }
 EXPORT_SYMBOL(schedule);
 
+/**
+ * schedule_preempt_disabled - called with preemption disabled
+ *
+ * Returns with preemption disabled. Note: preempt_count must be 1
+ */
+void __sched schedule_preempt_disabled(void)
+{
+       sched_preempt_enable_no_resched();
+       schedule();
+       preempt_disable();
+}
+
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 
 static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
@@ -3406,9 +3417,9 @@ EXPORT_SYMBOL(__wake_up);
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
+void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
 {
-       __wake_up_common(q, mode, 1, 0, NULL);
+       __wake_up_common(q, mode, nr, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
@@ -3767,6 +3778,24 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
        rq = __task_rq_lock(p);
 
+       /*
+        * Idle task boosting is a nono in general. There is one
+        * exception, when PREEMPT_RT and NOHZ is active:
+        *
+        * The idle task calls get_next_timer_interrupt() and holds
+        * the timer wheel base->lock on the CPU and another CPU wants
+        * to access the timer (probably to cancel it). We can safely
+        * ignore the boosting request, as the idle CPU runs this code
+        * with interrupts disabled and will complete the lock
+        * protected section without being interrupted. So there is no
+        * real need to boost.
+        */
+       if (unlikely(p == rq->idle)) {
+               WARN_ON(p != rq->curr);
+               WARN_ON(p->pi_blocked_on);
+               goto out_unlock;
+       }
+
        trace_sched_pi_setprio(p, prio);
        oldprio = p->prio;
        prev_class = p->sched_class;
@@ -3790,11 +3819,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
                enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
 
        check_class_changed(rq, p, prev_class, oldprio);
+out_unlock:
        __task_rq_unlock(rq);
 }
-
 #endif
-
 void set_user_nice(struct task_struct *p, long nice)
 {
        int old_prio, delta, on_rq;
@@ -4474,7 +4502,7 @@ SYSCALL_DEFINE0(sched_yield)
        __release(rq->lock);
        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
        do_raw_spin_unlock(&rq->lock);
-       preempt_enable_no_resched();
+       sched_preempt_enable_no_resched();
 
        schedule();
 
@@ -4548,8 +4576,24 @@ EXPORT_SYMBOL(__cond_resched_softirq);
 /**
  * yield - yield the current processor to other threads.
  *
- * This is a shortcut for kernel-space yielding - it marks the
- * thread runnable and calls sys_sched_yield().
+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
+ *
+ * The scheduler is at all times free to pick the calling task as the most
+ * eligible task to run, if removing the yield() call from your code breaks
+ * it, its already broken.
+ *
+ * Typical broken usage is:
+ *
+ * while (!event)
+ *     yield();
+ *
+ * where one assumes that yield() will let 'the other' process run that will
+ * make event true. If the current task is a SCHED_FIFO task that will never
+ * happen. Never use yield() as a progress guarantee!!
+ *
+ * If you want to use yield() to wait for something, use wait_event().
+ * If you want to use yield() to be 'nice' for others, use cond_resched().
+ * If you still want to use yield(), do not!
  */
 void __sched yield(void)
 {
@@ -5381,7 +5425,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
                                      unsigned long action, void *hcpu)
 {
        switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_ONLINE:
+       case CPU_STARTING:
        case CPU_DOWN_FAILED:
                set_cpu_active((long)hcpu, true);
                return NOTIFY_OK;
@@ -5753,7 +5797,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
  *
  * Also keep a unique ID per domain (we use the first cpu number in
  * the cpumask of the domain), this allows us to quickly tell if
- * two cpus are in the same cache domain, see ttwu_share_cache().
+ * two cpus are in the same cache domain, see cpus_share_cache().
  */
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_id);
@@ -6728,7 +6772,7 @@ int __init sched_create_sysfs_power_savings_entries(struct device *dev)
 static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                             void *hcpu)
 {
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
                cpuset_update_active_cpus();
@@ -6741,7 +6785,7 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
 {
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
                cpuset_update_active_cpus();
                return NOTIFY_OK;
@@ -6930,6 +6974,9 @@ void __init sched_init(void)
                rq->online = 0;
                rq->idle_stamp = 0;
                rq->avg_idle = 2*sysctl_sched_migration_cost;
+
+               INIT_LIST_HEAD(&rq->cfs_tasks);
+
                rq_attach_root(rq, &def_root_domain);
 #ifdef CONFIG_NO_HZ
                rq->nohz_flags = 0;
index 2a075e1..09acaa1 100644 (file)
@@ -288,7 +288,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 
        P(yld_count);
 
-       P(sched_switch);
        P(sched_count);
        P(sched_goidle);
 #ifdef CONFIG_SMP
index aca16b8..94340c7 100644 (file)
@@ -776,29 +776,16 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void
-add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
-{
-       cfs_rq->task_weight += weight;
-}
-#else
-static inline void
-add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
-{
-}
-#endif
-
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        update_load_add(&cfs_rq->load, se->load.weight);
        if (!parent_entity(se))
                update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
-       if (entity_is_task(se)) {
-               add_cfs_task_weight(cfs_rq, se->load.weight);
-               list_add(&se->group_node, &cfs_rq->tasks);
-       }
+#ifdef CONFIG_SMP
+       if (entity_is_task(se))
+               list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks);
+#endif
        cfs_rq->nr_running++;
 }
 
@@ -808,10 +795,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
        update_load_sub(&cfs_rq->load, se->load.weight);
        if (!parent_entity(se))
                update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
-       if (entity_is_task(se)) {
-               add_cfs_task_weight(cfs_rq, -se->load.weight);
+       if (entity_is_task(se))
                list_del_init(&se->group_node);
-       }
        cfs_rq->nr_running--;
 }
 
@@ -1401,20 +1386,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 #ifdef CONFIG_CFS_BANDWIDTH
 
 #ifdef HAVE_JUMP_LABEL
-static struct jump_label_key __cfs_bandwidth_used;
+static struct static_key __cfs_bandwidth_used;
 
 static inline bool cfs_bandwidth_used(void)
 {
-       return static_branch(&__cfs_bandwidth_used);
+       return static_key_false(&__cfs_bandwidth_used);
 }
 
 void account_cfs_bandwidth_used(int enabled, int was_enabled)
 {
        /* only need to count groups transitioning between enabled/!enabled */
        if (enabled && !was_enabled)
-               jump_label_inc(&__cfs_bandwidth_used);
+               static_key_slow_inc(&__cfs_bandwidth_used);
        else if (!enabled && was_enabled)
-               jump_label_dec(&__cfs_bandwidth_used);
+               static_key_slow_dec(&__cfs_bandwidth_used);
 }
 #else /* HAVE_JUMP_LABEL */
 static bool cfs_bandwidth_used(void)
@@ -2672,8 +2657,6 @@ static int select_idle_sibling(struct task_struct *p, int target)
        /*
         * Otherwise, iterate the domains and find an elegible idle cpu.
         */
-       rcu_read_lock();
-
        sd = rcu_dereference(per_cpu(sd_llc, target));
        for_each_lower_domain(sd) {
                sg = sd->groups;
@@ -2695,8 +2678,6 @@ next:
                } while (sg != sd->groups);
        }
 done:
-       rcu_read_unlock();
-
        return target;
 }
 
@@ -2922,7 +2903,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
                return;
 
        /*
-        * This is possible from callers such as pull_task(), in which we
+        * This is possible from callers such as move_task(), in which we
         * unconditionally check_prempt_curr() after an enqueue (which may have
         * lead to a throttle).  This both saves work and prevents false
         * next-buddy nomination below.
@@ -3086,17 +3067,39 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
  * Fair scheduling class load-balancing methods:
  */
 
+static unsigned long __read_mostly max_load_balance_interval = HZ/10;
+
+#define LBF_ALL_PINNED 0x01
+#define LBF_NEED_BREAK 0x02
+
+struct lb_env {
+       struct sched_domain     *sd;
+
+       int                     src_cpu;
+       struct rq               *src_rq;
+
+       int                     dst_cpu;
+       struct rq               *dst_rq;
+
+       enum cpu_idle_type      idle;
+       long                    load_move;
+       unsigned int            flags;
+
+       unsigned int            loop;
+       unsigned int            loop_break;
+       unsigned int            loop_max;
+};
+
 /*
- * pull_task - move a task from a remote runqueue to the local runqueue.
+ * move_task - move a task from one runqueue to another runqueue.
  * Both runqueues must be locked.
  */
-static void pull_task(struct rq *src_rq, struct task_struct *p,
-                     struct rq *this_rq, int this_cpu)
+static void move_task(struct task_struct *p, struct lb_env *env)
 {
-       deactivate_task(src_rq, p, 0);
-       set_task_cpu(p, this_cpu);
-       activate_task(this_rq, p, 0);
-       check_preempt_curr(this_rq, p, 0);
+       deactivate_task(env->src_rq, p, 0);
+       set_task_cpu(p, env->dst_cpu);
+       activate_task(env->dst_rq, p, 0);
+       check_preempt_curr(env->dst_rq, p, 0);
 }
 
 /*
@@ -3131,19 +3134,11 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
        return delta < (s64)sysctl_sched_migration_cost;
 }
 
-#define LBF_ALL_PINNED 0x01
-#define LBF_NEED_BREAK 0x02    /* clears into HAD_BREAK */
-#define LBF_HAD_BREAK  0x04
-#define LBF_HAD_BREAKS 0x0C    /* count HAD_BREAKs overflows into ABORT */
-#define LBF_ABORT      0x10
-
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
-                    struct sched_domain *sd, enum cpu_idle_type idle,
-                    int *lb_flags)
+int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
        int tsk_cache_hot = 0;
        /*
@@ -3152,13 +3147,13 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         * 2) cannot be migrated to this CPU due to cpus_allowed, or
         * 3) are cache-hot on their current CPU.
         */
-       if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) {
+       if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
                schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
                return 0;
        }
-       *lb_flags &= ~LBF_ALL_PINNED;
+       env->flags &= ~LBF_ALL_PINNED;
 
-       if (task_running(rq, p)) {
+       if (task_running(env->src_rq, p)) {
                schedstat_inc(p, se.statistics.nr_failed_migrations_running);
                return 0;
        }
@@ -3169,12 +3164,12 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         * 2) too many balance attempts have failed.
         */
 
-       tsk_cache_hot = task_hot(p, rq->clock_task, sd);
+       tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
        if (!tsk_cache_hot ||
-               sd->nr_balance_failed > sd->cache_nice_tries) {
+               env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 #ifdef CONFIG_SCHEDSTATS
                if (tsk_cache_hot) {
-                       schedstat_inc(sd, lb_hot_gained[idle]);
+                       schedstat_inc(env->sd, lb_hot_gained[env->idle]);
                        schedstat_inc(p, se.statistics.nr_forced_migrations);
                }
 #endif
@@ -3195,65 +3190,80 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
  *
  * Called with both runqueues locked.
  */
-static int
-move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
-             struct sched_domain *sd, enum cpu_idle_type idle)
+static int move_one_task(struct lb_env *env)
 {
        struct task_struct *p, *n;
-       struct cfs_rq *cfs_rq;
-       int pinned = 0;
 
-       for_each_leaf_cfs_rq(busiest, cfs_rq) {
-               list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
-                       if (throttled_lb_pair(task_group(p),
-                                             busiest->cpu, this_cpu))
-                               break;
+       list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+               if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu))
+                       continue;
 
-                       if (!can_migrate_task(p, busiest, this_cpu,
-                                               sd, idle, &pinned))
-                               continue;
+               if (!can_migrate_task(p, env))
+                       continue;
 
-                       pull_task(busiest, p, this_rq, this_cpu);
-                       /*
-                        * Right now, this is only the second place pull_task()
-                        * is called, so we can safely collect pull_task()
-                        * stats here rather than inside pull_task().
-                        */
-                       schedstat_inc(sd, lb_gained[idle]);
-                       return 1;
-               }
+               move_task(p, env);
+               /*
+                * Right now, this is only the second place move_task()
+                * is called, so we can safely collect move_task()
+                * stats here rather than inside move_task().
+                */
+               schedstat_inc(env->sd, lb_gained[env->idle]);
+               return 1;
        }
-
        return 0;
 }
 
-static unsigned long
-balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-             unsigned long max_load_move, struct sched_domain *sd,
-             enum cpu_idle_type idle, int *lb_flags,
-             struct cfs_rq *busiest_cfs_rq)
+static unsigned long task_h_load(struct task_struct *p);
+
+/*
+ * move_tasks tries to move up to load_move weighted load from busiest to
+ * this_rq, as part of a balancing operation within domain "sd".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_tasks(struct lb_env *env)
 {
-       int loops = 0, pulled = 0;
-       long rem_load_move = max_load_move;
-       struct task_struct *p, *n;
+       struct list_head *tasks = &env->src_rq->cfs_tasks;
+       struct task_struct *p;
+       unsigned long load;
+       int pulled = 0;
+
+       if (env->load_move <= 0)
+               return 0;
 
-       if (max_load_move == 0)
-               goto out;
+       while (!list_empty(tasks)) {
+               p = list_first_entry(tasks, struct task_struct, se.group_node);
 
-       list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
-               if (loops++ > sysctl_sched_nr_migrate) {
-                       *lb_flags |= LBF_NEED_BREAK;
+               env->loop++;
+               /* We've more or less seen every task there is, call it quits */
+               if (env->loop > env->loop_max)
+                       break;
+
+               /* take a breather every nr_migrate tasks */
+               if (env->loop > env->loop_break) {
+                       env->loop_break += sysctl_sched_nr_migrate;
+                       env->flags |= LBF_NEED_BREAK;
                        break;
                }
 
-               if ((p->se.load.weight >> 1) > rem_load_move ||
-                   !can_migrate_task(p, busiest, this_cpu, sd, idle,
-                                     lb_flags))
-                       continue;
+               if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+                       goto next;
+
+               load = task_h_load(p);
+
+               if (load < 16 && !env->sd->nr_balance_failed)
+                       goto next;
+
+               if ((load / 2) > env->load_move)
+                       goto next;
 
-               pull_task(busiest, p, this_rq, this_cpu);
+               if (!can_migrate_task(p, env))
+                       goto next;
+
+               move_task(p, env);
                pulled++;
-               rem_load_move -= p->se.load.weight;
+               env->load_move -= load;
 
 #ifdef CONFIG_PREEMPT
                /*
@@ -3261,28 +3271,30 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 * kernels will stop after the first task is pulled to minimize
                 * the critical section.
                 */
-               if (idle == CPU_NEWLY_IDLE) {
-                       *lb_flags |= LBF_ABORT;
+               if (env->idle == CPU_NEWLY_IDLE)
                        break;
-               }
 #endif
 
                /*
                 * We only want to steal up to the prescribed amount of
                 * weighted load.
                 */
-               if (rem_load_move <= 0)
+               if (env->load_move <= 0)
                        break;
+
+               continue;
+next:
+               list_move_tail(&p->se.group_node, tasks);
        }
-out:
+
        /*
-        * Right now, this is one of only two places pull_task() is called,
-        * so we can safely collect pull_task() stats here rather than
-        * inside pull_task().
+        * Right now, this is one of only two places move_task() is called,
+        * so we can safely collect move_task() stats here rather than
+        * inside move_task().
         */
-       schedstat_add(sd, lb_gained[idle], pulled);
+       schedstat_add(env->sd, lb_gained[env->idle], pulled);
 
-       return max_load_move - rem_load_move;
+       return pulled;
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3362,113 +3374,35 @@ static int tg_load_down(struct task_group *tg, void *data)
 
 static void update_h_load(long cpu)
 {
+       rcu_read_lock();
        walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+       rcu_read_unlock();
 }
 
-static unsigned long
-load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                 unsigned long max_load_move,
-                 struct sched_domain *sd, enum cpu_idle_type idle,
-                 int *lb_flags)
+static unsigned long task_h_load(struct task_struct *p)
 {
-       long rem_load_move = max_load_move;
-       struct cfs_rq *busiest_cfs_rq;
-
-       rcu_read_lock();
-       update_h_load(cpu_of(busiest));
-
-       for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
-               unsigned long busiest_h_load = busiest_cfs_rq->h_load;
-               unsigned long busiest_weight = busiest_cfs_rq->load.weight;
-               u64 rem_load, moved_load;
-
-               if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
-                       break;
-
-               /*
-                * empty group or part of a throttled hierarchy
-                */
-               if (!busiest_cfs_rq->task_weight ||
-                   throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
-                       continue;
-
-               rem_load = (u64)rem_load_move * busiest_weight;
-               rem_load = div_u64(rem_load, busiest_h_load + 1);
-
-               moved_load = balance_tasks(this_rq, this_cpu, busiest,
-                               rem_load, sd, idle, lb_flags,
-                               busiest_cfs_rq);
-
-               if (!moved_load)
-                       continue;
+       struct cfs_rq *cfs_rq = task_cfs_rq(p);
+       unsigned long load;
 
-               moved_load *= busiest_h_load;
-               moved_load = div_u64(moved_load, busiest_weight + 1);
+       load = p->se.load.weight;
+       load = div_u64(load * cfs_rq->h_load, cfs_rq->load.weight + 1);
 
-               rem_load_move -= moved_load;
-               if (rem_load_move < 0)
-                       break;
-       }
-       rcu_read_unlock();
-
-       return max_load_move - rem_load_move;
+       return load;
 }
 #else
 static inline void update_shares(int cpu)
 {
 }
 
-static unsigned long
-load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                 unsigned long max_load_move,
-                 struct sched_domain *sd, enum cpu_idle_type idle,
-                 int *lb_flags)
+static inline void update_h_load(long cpu)
 {
-       return balance_tasks(this_rq, this_cpu, busiest,
-                       max_load_move, sd, idle, lb_flags,
-                       &busiest->cfs);
 }
-#endif
 
-/*
- * move_tasks tries to move up to max_load_move weighted load from busiest to
- * this_rq, as part of a balancing operation within domain "sd".
- * Returns 1 if successful and 0 otherwise.
- *
- * Called with both runqueues locked.
- */
-static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                     unsigned long max_load_move,
-                     struct sched_domain *sd, enum cpu_idle_type idle,
-                     int *lb_flags)
+static unsigned long task_h_load(struct task_struct *p)
 {
-       unsigned long total_load_moved = 0, load_moved;
-
-       do {
-               load_moved = load_balance_fair(this_rq, this_cpu, busiest,
-                               max_load_move - total_load_moved,
-                               sd, idle, lb_flags);
-
-               total_load_moved += load_moved;
-
-               if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
-                       break;
-
-#ifdef CONFIG_PREEMPT
-               /*
-                * NEWIDLE balancing is a source of latency, so preemptible
-                * kernels will stop after the first task is pulled to minimize
-                * the critical section.
-                */
-               if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) {
-                       *lb_flags |= LBF_ABORT;
-                       break;
-               }
-#endif
-       } while (load_moved && max_load_move > total_load_moved);
-
-       return total_load_moved > 0;
+       return p->se.load.weight;
 }
+#endif
 
 /********** Helpers for find_busiest_group ************************/
 /*
@@ -3778,6 +3712,11 @@ void update_group_power(struct sched_domain *sd, int cpu)
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
        unsigned long power;
+       unsigned long interval;
+
+       interval = msecs_to_jiffies(sd->balance_interval);
+       interval = clamp(interval, 1UL, max_load_balance_interval);
+       sdg->sgp->next_update = jiffies + interval;
 
        if (!child) {
                update_cpu_power(sd, cpu);
@@ -3885,12 +3824,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
         * domains. In the newly idle case, we will allow all the cpu's
         * to do the newly idle load balance.
         */
-       if (idle != CPU_NEWLY_IDLE && local_group) {
-               if (balance_cpu != this_cpu) {
-                       *balance = 0;
-                       return;
-               }
-               update_group_power(sd, this_cpu);
+       if (local_group) {
+               if (idle != CPU_NEWLY_IDLE) {
+                       if (balance_cpu != this_cpu) {
+                               *balance = 0;
+                               return;
+                       }
+                       update_group_power(sd, this_cpu);
+               } else if (time_after_eq(jiffies, group->sgp->next_update))
+                       update_group_power(sd, this_cpu);
        }
 
        /* Adjust by relative CPU power of the group */
@@ -4453,13 +4395,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                        struct sched_domain *sd, enum cpu_idle_type idle,
                        int *balance)
 {
-       int ld_moved, lb_flags = 0, active_balance = 0;
+       int ld_moved, active_balance = 0;
        struct sched_group *group;
        unsigned long imbalance;
        struct rq *busiest;
        unsigned long flags;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
+       struct lb_env env = {
+               .sd             = sd,
+               .dst_cpu        = this_cpu,
+               .dst_rq         = this_rq,
+               .idle           = idle,
+               .loop_break     = sysctl_sched_nr_migrate,
+       };
+
        cpumask_copy(cpus, cpu_active_mask);
 
        schedstat_inc(sd, lb_count[idle]);
@@ -4494,32 +4444,34 @@ redo:
                 * still unbalanced. ld_moved simply stays zero, so it is
                 * correctly treated as an imbalance.
                 */
-               lb_flags |= LBF_ALL_PINNED;
+               env.flags |= LBF_ALL_PINNED;
+               env.load_move = imbalance;
+               env.src_cpu = busiest->cpu;
+               env.src_rq = busiest;
+               env.loop_max = busiest->nr_running;
+
+more_balance:
                local_irq_save(flags);
                double_rq_lock(this_rq, busiest);
-               ld_moved = move_tasks(this_rq, this_cpu, busiest,
-                                     imbalance, sd, idle, &lb_flags);
+               if (!env.loop)
+                       update_h_load(env.src_cpu);
+               ld_moved += move_tasks(&env);
                double_rq_unlock(this_rq, busiest);
                local_irq_restore(flags);
 
+               if (env.flags & LBF_NEED_BREAK) {
+                       env.flags &= ~LBF_NEED_BREAK;
+                       goto more_balance;
+               }
+
                /*
                 * some other cpu did the load balance for us.
                 */
                if (ld_moved && this_cpu != smp_processor_id())
                        resched_cpu(this_cpu);
 
-               if (lb_flags & LBF_ABORT)
-                       goto out_balanced;
-
-               if (lb_flags & LBF_NEED_BREAK) {
-                       lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK;
-                       if (lb_flags & LBF_ABORT)
-                               goto out_balanced;
-                       goto redo;
-               }
-
                /* All tasks on this runqueue were pinned by CPU affinity */
-               if (unlikely(lb_flags & LBF_ALL_PINNED)) {
+               if (unlikely(env.flags & LBF_ALL_PINNED)) {
                        cpumask_clear_cpu(cpu_of(busiest), cpus);
                        if (!cpumask_empty(cpus))
                                goto redo;
@@ -4549,7 +4501,7 @@ redo:
                                        tsk_cpus_allowed(busiest->curr))) {
                                raw_spin_unlock_irqrestore(&busiest->lock,
                                                            flags);
-                               lb_flags |= LBF_ALL_PINNED;
+                               env.flags |= LBF_ALL_PINNED;
                                goto out_one_pinned;
                        }
 
@@ -4602,7 +4554,7 @@ out_balanced:
 
 out_one_pinned:
        /* tune up the balancing interval */
-       if (((lb_flags & LBF_ALL_PINNED) &&
+       if (((env.flags & LBF_ALL_PINNED) &&
                        sd->balance_interval < MAX_PINNED_INTERVAL) ||
                        (sd->balance_interval < sd->max_interval))
                sd->balance_interval *= 2;
@@ -4712,10 +4664,18 @@ static int active_load_balance_cpu_stop(void *data)
        }
 
        if (likely(sd)) {
+               struct lb_env env = {
+                       .sd             = sd,
+                       .dst_cpu        = target_cpu,
+                       .dst_rq         = target_rq,
+                       .src_cpu        = busiest_rq->cpu,
+                       .src_rq         = busiest_rq,
+                       .idle           = CPU_IDLE,
+               };
+
                schedstat_inc(sd, alb_count);
 
-               if (move_one_task(target_rq, target_cpu, busiest_rq,
-                                 sd, CPU_IDLE))
+               if (move_one_task(&env))
                        schedstat_inc(sd, alb_pushed);
                else
                        schedstat_inc(sd, alb_failed);
@@ -4947,8 +4907,6 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
 
 static DEFINE_SPINLOCK(balancing);
 
-static unsigned long __read_mostly max_load_balance_interval = HZ/10;
-
 /*
  * Scale the max load_balance interval with the number of CPUs in the system.
  * This trades load-balance latency on larger machines for less cross talk.
@@ -5342,7 +5300,6 @@ static void set_curr_task_fair(struct rq *rq)
 void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
        cfs_rq->tasks_timeline = RB_ROOT;
-       INIT_LIST_HEAD(&cfs_rq->tasks);
        cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 #ifndef CONFIG_64BIT
        cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
@@ -5614,6 +5571,7 @@ __init void init_sched_fair_class(void)
        open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
 #ifdef CONFIG_NO_HZ
+       nohz.next_balance = jiffies;
        zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
        cpu_notifier(sched_ilb_notifier, 0);
 #endif
index f42ae7f..b60dad7 100644 (file)
@@ -778,12 +778,9 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
-       int i, idle = 1;
+       int i, idle = 1, throttled = 0;
        const struct cpumask *span;
 
-       if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
-               return 1;
-
        span = sched_rt_period_mask();
        for_each_cpu(i, span) {
                int enqueue = 0;
@@ -818,12 +815,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                        if (!rt_rq_throttled(rt_rq))
                                enqueue = 1;
                }
+               if (rt_rq->rt_throttled)
+                       throttled = 1;
 
                if (enqueue)
                        sched_rt_rq_enqueue(rt_rq);
                raw_spin_unlock(&rq->lock);
        }
 
+       if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
+               return 1;
+
        return idle;
 }
 
@@ -855,8 +857,30 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
                return 0;
 
        if (rt_rq->rt_time > runtime) {
-               rt_rq->rt_throttled = 1;
-               printk_once(KERN_WARNING "sched: RT throttling activated\n");
+               struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+
+               /*
+                * Don't actually throttle groups that have no runtime assigned
+                * but accrue some time due to boosting.
+                */
+               if (likely(rt_b->rt_runtime)) {
+                       static bool once = false;
+
+                       rt_rq->rt_throttled = 1;
+
+                       if (!once) {
+                               once = true;
+                               printk_sched("sched: RT throttling activated\n");
+                       }
+               } else {
+                       /*
+                        * In case we did anyway, make it go away,
+                        * replenishment is a joke, since it will replenish us
+                        * with exactly 0 ns.
+                        */
+                       rt_rq->rt_time = 0;
+               }
+
                if (rt_rq_throttled(rt_rq)) {
                        sched_rt_rq_dequeue(rt_rq);
                        return 1;
@@ -884,7 +908,8 @@ static void update_curr_rt(struct rq *rq)
        if (unlikely((s64)delta_exec < 0))
                delta_exec = 0;
 
-       schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));
+       schedstat_set(curr->se.statistics.exec_max,
+                     max(curr->se.statistics.exec_max, delta_exec));
 
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
@@ -1972,7 +1997,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
        if (--p->rt.time_slice)
                return;
 
-       p->rt.time_slice = DEF_TIMESLICE;
+       p->rt.time_slice = RR_TIMESLICE;
 
        /*
         * Requeue to the end of queue if we are not the only element
@@ -2000,7 +2025,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
         * Time slice is 0 for SCHED_FIFO tasks
         */
        if (task->policy == SCHED_RR)
-               return DEF_TIMESLICE;
+               return RR_TIMESLICE;
        else
                return 0;
 }
index 98c0c26..42b1f30 100644 (file)
@@ -36,11 +36,7 @@ extern __read_mostly int scheduler_running;
 
 /*
  * These are the 'tuning knobs' of the scheduler:
- *
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
  */
-#define DEF_TIMESLICE          (100 * HZ / 1000)
 
 /*
  * single value that denotes runtime == period, ie unlimited time.
@@ -216,9 +212,6 @@ struct cfs_rq {
        struct rb_root tasks_timeline;
        struct rb_node *rb_leftmost;
 
-       struct list_head tasks;
-       struct list_head *balance_iterator;
-
        /*
         * 'curr' points to currently running entity on this cfs_rq.
         * It is set to NULL otherwise (i.e when none are currently running).
@@ -246,11 +239,6 @@ struct cfs_rq {
 
 #ifdef CONFIG_SMP
        /*
-        * the part of load.weight contributed by tasks
-        */
-       unsigned long task_weight;
-
-       /*
         *   h_load = weight * f(tg)
         *
         * Where f(tg) is the recursive weight fraction assigned to
@@ -424,6 +412,8 @@ struct rq {
        int cpu;
        int online;
 
+       struct list_head cfs_tasks;
+
        u64 rt_avg;
        u64 age_stamp;
        u64 idle_stamp;
@@ -462,7 +452,6 @@ struct rq {
        unsigned int yld_count;
 
        /* schedule() stats */
-       unsigned int sched_switch;
        unsigned int sched_count;
        unsigned int sched_goidle;
 
@@ -611,7 +600,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  */
 #ifdef CONFIG_SCHED_DEBUG
-# include <linux/jump_label.h>
+# include <linux/static_key.h>
 # define const_debug __read_mostly
 #else
 # define const_debug const
@@ -630,18 +619,18 @@ enum {
 #undef SCHED_FEAT
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
-static __always_inline bool static_branch__true(struct jump_label_key *key)
+static __always_inline bool static_branch__true(struct static_key *key)
 {
-       return likely(static_branch(key)); /* Not out of line branch. */
+       return static_key_true(key); /* Not out of line branch. */
 }
 
-static __always_inline bool static_branch__false(struct jump_label_key *key)
+static __always_inline bool static_branch__false(struct static_key *key)
 {
-       return unlikely(static_branch(key)); /* Out of line branch. */
+       return static_key_false(key); /* Out of line branch. */
 }
 
 #define SCHED_FEAT(name, enabled)                                      \
-static __always_inline bool static_branch_##name(struct jump_label_key *key) \
+static __always_inline bool static_branch_##name(struct static_key *key) \
 {                                                                      \
        return static_branch__##enabled(key);                           \
 }
@@ -650,7 +639,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \
 
 #undef SCHED_FEAT
 
-extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR];
+extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
 #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
 #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
 #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
index 2a581ba..903ffa9 100644 (file)
@@ -32,9 +32,9 @@ static int show_schedstat(struct seq_file *seq, void *v)
 
                /* runqueue-specific stats */
                seq_printf(seq,
-                   "cpu%d %u %u %u %u %u %u %llu %llu %lu",
+                   "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
                    cpu, rq->yld_count,
-                   rq->sched_switch, rq->sched_count, rq->sched_goidle,
+                   rq->sched_count, rq->sched_goidle,
                    rq->ttwu_count, rq->ttwu_local,
                    rq->rq_cpu_time,
                    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
index c73c428..8511e39 100644 (file)
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
        struct sigpending *pending;
        struct sigqueue *q;
        int override_rlimit;
-
-       trace_signal_generate(sig, info, t);
+       int ret = 0, result;
 
        assert_spin_locked(&t->sighand->siglock);
 
+       result = TRACE_SIGNAL_IGNORED;
        if (!prepare_signal(sig, t, from_ancestor_ns))
-               return 0;
+               goto ret;
 
        pending = group ? &t->signal->shared_pending : &t->pending;
        /*
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
         * exactly one non-rt signal, so that we can get more
         * detailed information about the cause of the signal.
         */
+       result = TRACE_SIGNAL_ALREADY_PENDING;
        if (legacy_queue(pending, sig))
-               return 0;
+               goto ret;
+
+       result = TRACE_SIGNAL_DELIVERED;
        /*
         * fast-pathed signals for kernel-internal things like SIGSTOP
         * or SIGKILL.
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
                         * signal was rt and sent by user using something
                         * other than kill().
                         */
-                       trace_signal_overflow_fail(sig, group, info);
-                       return -EAGAIN;
+                       result = TRACE_SIGNAL_OVERFLOW_FAIL;
+                       ret = -EAGAIN;
+                       goto ret;
                } else {
                        /*
                         * This is a silent loss of information.  We still
                         * send the signal, but the *info bits are lost.
                         */
-                       trace_signal_lose_info(sig, group, info);
+                       result = TRACE_SIGNAL_LOSE_INFO;
                }
        }
 
@@ -1142,7 +1146,9 @@ out_set:
        signalfd_notify(t, sig);
        sigaddset(&pending->signal, sig);
        complete_signal(sig, t, group);
-       return 0;
+ret:
+       trace_signal_generate(sig, info, t, group, result);
+       return ret;
 }
 
 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
        int sig = q->info.si_signo;
        struct sigpending *pending;
        unsigned long flags;
-       int ret;
+       int ret, result;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
 
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
                goto ret;
 
        ret = 1; /* the signal is ignored */
+       result = TRACE_SIGNAL_IGNORED;
        if (!prepare_signal(sig, t, 0))
                goto out;
 
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
                 */
                BUG_ON(q->info.si_code != SI_TIMER);
                q->info.si_overrun++;
+               result = TRACE_SIGNAL_ALREADY_PENDING;
                goto out;
        }
        q->info.si_overrun = 0;
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
        list_add_tail(&q->list, &pending->list);
        sigaddset(&pending->signal, sig);
        complete_signal(sig, t, group);
+       result = TRACE_SIGNAL_DELIVERED;
 out:
+       trace_signal_generate(sig, &q->info, t, group, result);
        unlock_task_sighand(t, &flags);
 ret:
        return ret;
index 4eb3a0f..671f959 100644 (file)
@@ -297,7 +297,7 @@ void irq_enter(void)
        int cpu = smp_processor_id();
 
        rcu_irq_enter();
-       if (idle_cpu(cpu) && !in_interrupt()) {
+       if (is_idle_task(current) && !in_interrupt()) {
                /*
                 * Prevent raise_softirq from needlessly waking up ksoftirqd
                 * here, as softirq will be serviced on return from interrupt.
@@ -310,31 +310,21 @@ void irq_enter(void)
        __irq_enter();
 }
 
-#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 static inline void invoke_softirq(void)
 {
-       if (!force_irqthreads)
+       if (!force_irqthreads) {
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
                __do_softirq();
-       else {
-               __local_bh_disable((unsigned long)__builtin_return_address(0),
-                               SOFTIRQ_OFFSET);
-               wakeup_softirqd();
-               __local_bh_enable(SOFTIRQ_OFFSET);
-       }
-}
 #else
-static inline void invoke_softirq(void)
-{
-       if (!force_irqthreads)
                do_softirq();
-       else {
+#endif
+       } else {
                __local_bh_disable((unsigned long)__builtin_return_address(0),
                                SOFTIRQ_OFFSET);
                wakeup_softirqd();
                __local_bh_enable(SOFTIRQ_OFFSET);
        }
 }
-#endif
 
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
@@ -353,7 +343,7 @@ void irq_exit(void)
                tick_nohz_irq_exit();
 #endif
        rcu_irq_exit();
-       preempt_enable_no_resched();
+       sched_preempt_enable_no_resched();
 }
 
 /*
@@ -385,6 +375,12 @@ void raise_softirq(unsigned int nr)
        local_irq_restore(flags);
 }
 
+void __raise_softirq_irqoff(unsigned int nr)
+{
+       trace_softirq_raise(nr);
+       or_softirq_pending(1UL << nr);
+}
+
 void open_softirq(int nr, void (*action)(struct softirq_action *))
 {
        softirq_vec[nr].action = action;
@@ -744,9 +740,7 @@ static int run_ksoftirqd(void * __bind_cpu)
        while (!kthread_should_stop()) {
                preempt_disable();
                if (!local_softirq_pending()) {
-                       preempt_enable_no_resched();
-                       schedule();
-                       preempt_disable();
+                       schedule_preempt_disabled();
                }
 
                __set_current_state(TASK_RUNNING);
@@ -761,7 +755,7 @@ static int run_ksoftirqd(void * __bind_cpu)
                        if (local_softirq_pending())
                                __do_softirq();
                        local_irq_enable();
-                       preempt_enable_no_resched();
+                       sched_preempt_enable_no_resched();
                        cond_resched();
                        preempt_disable();
                        rcu_note_context_switch((long)__bind_cpu);
index 0febf61..ba35f3a 100644 (file)
@@ -172,6 +172,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
 {
        int idx;
 
+       rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
+                          !lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
+
        idx = sp->completed;
        mutex_lock(&sp->mutex);
 
@@ -280,19 +286,26 @@ void synchronize_srcu(struct srcu_struct *sp)
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
 /**
- * synchronize_srcu_expedited - like synchronize_srcu, but less patient
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
  * @sp: srcu_struct with which to synchronize.
  *
- * Flip the completed counter, and wait for the old count to drain to zero.
- * As with classic RCU, the updater must use some separate means of
- * synchronizing concurrent updates.  Can block; must be called from
- * process context.
+ * Wait for an SRCU grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_srcu_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_srcu() instead.
  *
- * Note that it is illegal to call synchronize_srcu_expedited()
- * from the corresponding SRCU read-side critical section; doing so
- * will result in deadlock.  However, it is perfectly legal to call
- * synchronize_srcu_expedited() on one srcu_struct from some other
- * srcu_struct's read-side critical section.
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier.  Failing to observe
+ * these restriction will result in deadlock.  It is also illegal to call
+ * synchronize_srcu_expedited() from the corresponding SRCU read-side
+ * critical section; doing so will result in deadlock.  However, it is
+ * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
+ * from some other srcu_struct's read-side critical section, as long as
+ * the resulting graph of srcu_structs is acyclic.
  */
 void synchronize_srcu_expedited(struct srcu_struct *sp)
 {
index 4070153..888d227 100644 (file)
@@ -1706,7 +1706,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
        if (arg4 | arg5)
                return -EINVAL;
 
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
        if (addr >= TASK_SIZE)
index f6117a4..6e039b1 100644 (file)
  * NTP timekeeping variables:
  */
 
+DEFINE_SPINLOCK(ntp_lock);
+
+
 /* USER_HZ period (usecs): */
 unsigned long                  tick_usec = TICK_USEC;
 
 /* ACTHZ period (nsecs): */
 unsigned long                  tick_nsec;
 
-u64                            tick_length;
+static u64                     tick_length;
 static u64                     tick_length_base;
 
 static struct hrtimer          leap_timer;
@@ -49,7 +52,7 @@ static struct hrtimer         leap_timer;
 static int                     time_state = TIME_OK;
 
 /* clock status bits:                                                  */
-int                            time_status = STA_UNSYNC;
+static int                     time_status = STA_UNSYNC;
 
 /* TAI offset (secs):                                                  */
 static long                    time_tai;
@@ -133,7 +136,7 @@ static inline void pps_reset_freq_interval(void)
 /**
  * pps_clear - Clears the PPS state variables
  *
- * Must be called while holding a write on the xtime_lock
+ * Must be called while holding a write on the ntp_lock
  */
 static inline void pps_clear(void)
 {
@@ -149,7 +152,7 @@ static inline void pps_clear(void)
  * the last PPS signal. When it reaches 0, indicate that PPS signal is
  * missing.
  *
- * Must be called while holding a write on the xtime_lock
+ * Must be called while holding a write on the ntp_lock
  */
 static inline void pps_dec_valid(void)
 {
@@ -233,6 +236,17 @@ static inline void pps_fill_timex(struct timex *txc)
 
 #endif /* CONFIG_NTP_PPS */
 
+
+/**
+ * ntp_synced - Returns 1 if the NTP status is not UNSYNC
+ *
+ */
+static inline int ntp_synced(void)
+{
+       return !(time_status & STA_UNSYNC);
+}
+
+
 /*
  * NTP methods:
  */
@@ -275,7 +289,7 @@ static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
 
        time_status |= STA_MODE;
 
-       return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
+       return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
 }
 
 static void ntp_update_offset(long offset)
@@ -330,11 +344,13 @@ static void ntp_update_offset(long offset)
 
 /**
  * ntp_clear - Clears the NTP state variables
- *
- * Must be called while holding a write on the xtime_lock
  */
 void ntp_clear(void)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&ntp_lock, flags);
+
        time_adjust     = 0;            /* stop active adjtime() */
        time_status     |= STA_UNSYNC;
        time_maxerror   = NTP_PHASE_LIMIT;
@@ -347,8 +363,23 @@ void ntp_clear(void)
 
        /* Clear PPS state variables */
        pps_clear();
+       spin_unlock_irqrestore(&ntp_lock, flags);
+
 }
 
+
+u64 ntp_tick_length(void)
+{
+       unsigned long flags;
+       s64 ret;
+
+       spin_lock_irqsave(&ntp_lock, flags);
+       ret = tick_length;
+       spin_unlock_irqrestore(&ntp_lock, flags);
+       return ret;
+}
+
+
 /*
  * Leap second processing. If in leap-insert state at the end of the
  * day, the system clock is set back one second; if in leap-delete
@@ -357,14 +388,15 @@ void ntp_clear(void)
 static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 {
        enum hrtimer_restart res = HRTIMER_NORESTART;
+       unsigned long flags;
+       int leap = 0;
 
-       write_seqlock(&xtime_lock);
-
+       spin_lock_irqsave(&ntp_lock, flags);
        switch (time_state) {
        case TIME_OK:
                break;
        case TIME_INS:
-               timekeeping_leap_insert(-1);
+               leap = -1;
                time_state = TIME_OOP;
                printk(KERN_NOTICE
                        "Clock: inserting leap second 23:59:60 UTC\n");
@@ -372,7 +404,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
                res = HRTIMER_RESTART;
                break;
        case TIME_DEL:
-               timekeeping_leap_insert(1);
+               leap = 1;
                time_tai--;
                time_state = TIME_WAIT;
                printk(KERN_NOTICE
@@ -387,8 +419,14 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
                        time_state = TIME_OK;
                break;
        }
+       spin_unlock_irqrestore(&ntp_lock, flags);
 
-       write_sequnlock(&xtime_lock);
+       /*
+        * We have to call this outside of the ntp_lock to keep
+        * the proper locking hierarchy
+        */
+       if (leap)
+               timekeeping_leap_insert(leap);
 
        return res;
 }
@@ -404,6 +442,9 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 void second_overflow(void)
 {
        s64 delta;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ntp_lock, flags);
 
        /* Bump the maxerror field */
        time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -423,23 +464,25 @@ void second_overflow(void)
        pps_dec_valid();
 
        if (!time_adjust)
-               return;
+               goto out;
 
        if (time_adjust > MAX_TICKADJ) {
                time_adjust -= MAX_TICKADJ;
                tick_length += MAX_TICKADJ_SCALED;
-               return;
+               goto out;
        }
 
        if (time_adjust < -MAX_TICKADJ) {
                time_adjust += MAX_TICKADJ;
                tick_length -= MAX_TICKADJ_SCALED;
-               return;
+               goto out;
        }
 
        tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
                                                         << NTP_SCALE_SHIFT;
        time_adjust = 0;
+out:
+       spin_unlock_irqrestore(&ntp_lock, flags);
 }
 
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -663,7 +706,7 @@ int do_adjtimex(struct timex *txc)
 
        getnstimeofday(&ts);
 
-       write_seqlock_irq(&xtime_lock);
+       spin_lock_irq(&ntp_lock);
 
        if (txc->modes & ADJ_ADJTIME) {
                long save_adjust = time_adjust;
@@ -705,7 +748,7 @@ int do_adjtimex(struct timex *txc)
        /* fill PPS status fields */
        pps_fill_timex(txc);
 
-       write_sequnlock_irq(&xtime_lock);
+       spin_unlock_irq(&ntp_lock);
 
        txc->time.tv_sec = ts.tv_sec;
        txc->time.tv_usec = ts.tv_nsec;
@@ -903,7 +946,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 
        pts_norm = pps_normalize_ts(*phase_ts);
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       spin_lock_irqsave(&ntp_lock, flags);
 
        /* clear the error bits, they will be set again if needed */
        time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -916,7 +959,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
         * just start the frequency interval */
        if (unlikely(pps_fbase.tv_sec == 0)) {
                pps_fbase = *raw_ts;
-               write_sequnlock_irqrestore(&xtime_lock, flags);
+               spin_unlock_irqrestore(&ntp_lock, flags);
                return;
        }
 
@@ -931,7 +974,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
                time_status |= STA_PPSJITTER;
                /* restart the frequency calibration interval */
                pps_fbase = *raw_ts;
-               write_sequnlock_irqrestore(&xtime_lock, flags);
+               spin_unlock_irqrestore(&ntp_lock, flags);
                pr_err("hardpps: PPSJITTER: bad pulse\n");
                return;
        }
@@ -948,7 +991,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 
        hardpps_update_phase(pts_norm.nsec);
 
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       spin_unlock_irqrestore(&ntp_lock, flags);
 }
 EXPORT_SYMBOL(hardpps);
 
index fd4a7b1..e883f57 100644 (file)
@@ -575,11 +575,15 @@ void tick_broadcast_switch_to_oneshot(void)
        unsigned long flags;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+       if (cpumask_empty(tick_get_broadcast_mask()))
+               goto end;
 
        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
        bc = tick_broadcast_device.evtdev;
        if (bc)
                tick_broadcast_setup_oneshot(bc);
+
+end:
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
index 7656642..3526038 100644 (file)
@@ -182,11 +182,7 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now)
 
 static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
 {
-       ktime_t now;
-
-       now = ktime_get();
-
-       update_ts_time_stats(cpu, ts, now, NULL);
+       ktime_t now = ktime_get();
 
        ts->idle_entrytime = now;
        ts->idle_active = 1;
@@ -562,20 +558,21 @@ void tick_nohz_idle_exit(void)
 
        local_irq_disable();
 
-       if (ts->idle_active || (ts->inidle && ts->tick_stopped))
+       WARN_ON_ONCE(!ts->inidle);
+
+       ts->inidle = 0;
+
+       if (ts->idle_active || ts->tick_stopped)
                now = ktime_get();
 
        if (ts->idle_active)
                tick_nohz_stop_idle(cpu, now);
 
-       if (!ts->inidle || !ts->tick_stopped) {
-               ts->inidle = 0;
+       if (!ts->tick_stopped) {
                local_irq_enable();
                return;
        }
 
-       ts->inidle = 0;
-
        /* Update jiffies first */
        select_nohz_load_balancer(0);
        tick_do_update_jiffies64(now);
index 0c63581..403c2a0 100644 (file)
@@ -25,6 +25,8 @@
 struct timekeeper {
        /* Current clocksource used for timekeeping. */
        struct clocksource *clock;
+       /* NTP adjusted clock multiplier */
+       u32     mult;
        /* The shift value of the current clocksource. */
        int     shift;
 
@@ -45,12 +47,47 @@ struct timekeeper {
        /* Shift conversion between clock shifted nano seconds and
         * ntp shifted nano seconds. */
        int     ntp_error_shift;
-       /* NTP adjusted clock multiplier */
-       u32     mult;
+
+       /* The current time */
+       struct timespec xtime;
+       /*
+        * wall_to_monotonic is what we need to add to xtime (or xtime corrected
+        * for sub jiffie times) to get to monotonic time.  Monotonic is pegged
+        * at zero at system boot time, so wall_to_monotonic will be negative,
+        * however, we will ALWAYS keep the tv_nsec part positive so we can use
+        * the usual normalization.
+        *
+        * wall_to_monotonic is moved after resume from suspend for the
+        * monotonic time not to jump. We need to add total_sleep_time to
+        * wall_to_monotonic to get the real boot based time offset.
+        *
+        * - wall_to_monotonic is no longer the boot time, getboottime must be
+        * used instead.
+        */
+       struct timespec wall_to_monotonic;
+       /* time spent in suspend */
+       struct timespec total_sleep_time;
+       /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
+       struct timespec raw_time;
+
+       /* Seqlock for all timekeeper values */
+       seqlock_t lock;
 };
 
 static struct timekeeper timekeeper;
 
+/*
+ * This read-write spinlock protects us from races in SMP while
+ * playing with xtime.
+ */
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
+
+
+/* flag for if timekeeping is suspended */
+int __read_mostly timekeeping_suspended;
+
+
+
 /**
  * timekeeper_setup_internals - Set up internals to use clocksource clock.
  *
@@ -135,47 +172,28 @@ static inline s64 timekeeping_get_ns_raw(void)
        return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 }
 
-/*
- * This read-write spinlock protects us from races in SMP while
- * playing with xtime.
- */
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
-
-
-/*
- * The current time
- * wall_to_monotonic is what we need to add to xtime (or xtime corrected
- * for sub jiffie times) to get to monotonic time.  Monotonic is pegged
- * at zero at system boot time, so wall_to_monotonic will be negative,
- * however, we will ALWAYS keep the tv_nsec part positive so we can use
- * the usual normalization.
- *
- * wall_to_monotonic is moved after resume from suspend for the monotonic
- * time not to jump. We need to add total_sleep_time to wall_to_monotonic
- * to get the real boot based time offset.
- *
- * - wall_to_monotonic is no longer the boot time, getboottime must be
- * used instead.
- */
-static struct timespec xtime __attribute__ ((aligned (16)));
-static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
-static struct timespec total_sleep_time;
-
-/*
- * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
- */
-static struct timespec raw_time;
+/* must hold write on timekeeper.lock */
+static void timekeeping_update(bool clearntp)
+{
+       if (clearntp) {
+               timekeeper.ntp_error = 0;
+               ntp_clear();
+       }
+       update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic,
+                        timekeeper.clock, timekeeper.mult);
+}
 
-/* flag for if timekeeping is suspended */
-int __read_mostly timekeeping_suspended;
 
-/* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
-       xtime.tv_sec += leapsecond;
-       wall_to_monotonic.tv_sec -= leapsecond;
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                       timekeeper.mult);
+       unsigned long flags;
+
+       write_seqlock_irqsave(&timekeeper.lock, flags);
+       timekeeper.xtime.tv_sec += leapsecond;
+       timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
+       timekeeping_update(false);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
+
 }
 
 /**
@@ -202,10 +220,10 @@ static void timekeeping_forward_now(void)
        /* If arch requires, add in gettimeoffset() */
        nsec += arch_gettimeoffset();
 
-       timespec_add_ns(&xtime, nsec);
+       timespec_add_ns(&timekeeper.xtime, nsec);
 
        nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
-       timespec_add_ns(&raw_time, nsec);
+       timespec_add_ns(&timekeeper.raw_time, nsec);
 }
 
 /**
@@ -222,15 +240,15 @@ void getnstimeofday(struct timespec *ts)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
 
-               *ts = xtime;
+               *ts = timekeeper.xtime;
                nsecs = timekeeping_get_ns();
 
                /* If arch requires, add in gettimeoffset() */
                nsecs += arch_gettimeoffset();
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        timespec_add_ns(ts, nsecs);
 }
@@ -245,14 +263,16 @@ ktime_t ktime_get(void)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&xtime_lock);
-               secs = xtime.tv_sec + wall_to_monotonic.tv_sec;
-               nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
+               seq = read_seqbegin(&timekeeper.lock);
+               secs = timekeeper.xtime.tv_sec +
+                               timekeeper.wall_to_monotonic.tv_sec;
+               nsecs = timekeeper.xtime.tv_nsec +
+                               timekeeper.wall_to_monotonic.tv_nsec;
                nsecs += timekeeping_get_ns();
                /* If arch requires, add in gettimeoffset() */
                nsecs += arch_gettimeoffset();
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
        /*
         * Use ktime_set/ktime_add_ns to create a proper ktime on
         * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -278,14 +298,14 @@ void ktime_get_ts(struct timespec *ts)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&xtime_lock);
-               *ts = xtime;
-               tomono = wall_to_monotonic;
+               seq = read_seqbegin(&timekeeper.lock);
+               *ts = timekeeper.xtime;
+               tomono = timekeeper.wall_to_monotonic;
                nsecs = timekeeping_get_ns();
                /* If arch requires, add in gettimeoffset() */
                nsecs += arch_gettimeoffset();
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
                                ts->tv_nsec + tomono.tv_nsec + nsecs);
@@ -313,10 +333,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
        do {
                u32 arch_offset;
 
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
 
-               *ts_raw = raw_time;
-               *ts_real = xtime;
+               *ts_raw = timekeeper.raw_time;
+               *ts_real = timekeeper.xtime;
 
                nsecs_raw = timekeeping_get_ns_raw();
                nsecs_real = timekeeping_get_ns();
@@ -326,7 +346,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                nsecs_raw += arch_offset;
                nsecs_real += arch_offset;
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        timespec_add_ns(ts_raw, nsecs_raw);
        timespec_add_ns(ts_real, nsecs_real);
@@ -365,23 +385,19 @@ int do_settimeofday(const struct timespec *tv)
        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       write_seqlock_irqsave(&timekeeper.lock, flags);
 
        timekeeping_forward_now();
 
-       ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
-       ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
-       wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
-
-       xtime = *tv;
+       ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec;
+       ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec;
+       timekeeper.wall_to_monotonic =
+                       timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
 
-       timekeeper.ntp_error = 0;
-       ntp_clear();
-
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeper.xtime = *tv;
+       timekeeping_update(true);
 
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -405,20 +421,17 @@ int timekeeping_inject_offset(struct timespec *ts)
        if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       write_seqlock_irqsave(&timekeeper.lock, flags);
 
        timekeeping_forward_now();
 
-       xtime = timespec_add(xtime, *ts);
-       wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
-
-       timekeeper.ntp_error = 0;
-       ntp_clear();
+       timekeeper.xtime = timespec_add(timekeeper.xtime, *ts);
+       timekeeper.wall_to_monotonic =
+                               timespec_sub(timekeeper.wall_to_monotonic, *ts);
 
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(true);
 
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -490,11 +503,11 @@ void getrawmonotonic(struct timespec *ts)
        s64 nsecs;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
                nsecs = timekeeping_get_ns_raw();
-               *ts = raw_time;
+               *ts = timekeeper.raw_time;
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        timespec_add_ns(ts, nsecs);
 }
@@ -510,24 +523,30 @@ int timekeeping_valid_for_hres(void)
        int ret;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
 
                ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        return ret;
 }
 
 /**
  * timekeeping_max_deferment - Returns max time the clocksource can be deferred
- *
- * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
- * ensure that the clocksource does not change!
  */
 u64 timekeeping_max_deferment(void)
 {
-       return timekeeper.clock->max_idle_ns;
+       unsigned long seq;
+       u64 ret;
+       do {
+               seq = read_seqbegin(&timekeeper.lock);
+
+               ret = timekeeper.clock->max_idle_ns;
+
+       } while (read_seqretry(&timekeeper.lock, seq));
+
+       return ret;
 }
 
 /**
@@ -572,28 +591,29 @@ void __init timekeeping_init(void)
        read_persistent_clock(&now);
        read_boot_clock(&boot);
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       seqlock_init(&timekeeper.lock);
 
        ntp_init();
 
+       write_seqlock_irqsave(&timekeeper.lock, flags);
        clock = clocksource_default_clock();
        if (clock->enable)
                clock->enable(clock);
        timekeeper_setup_internals(clock);
 
-       xtime.tv_sec = now.tv_sec;
-       xtime.tv_nsec = now.tv_nsec;
-       raw_time.tv_sec = 0;
-       raw_time.tv_nsec = 0;
+       timekeeper.xtime.tv_sec = now.tv_sec;
+       timekeeper.xtime.tv_nsec = now.tv_nsec;
+       timekeeper.raw_time.tv_sec = 0;
+       timekeeper.raw_time.tv_nsec = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
-               boot.tv_sec = xtime.tv_sec;
-               boot.tv_nsec = xtime.tv_nsec;
+               boot.tv_sec = timekeeper.xtime.tv_sec;
+               boot.tv_nsec = timekeeper.xtime.tv_nsec;
        }
-       set_normalized_timespec(&wall_to_monotonic,
+       set_normalized_timespec(&timekeeper.wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
-       total_sleep_time.tv_sec = 0;
-       total_sleep_time.tv_nsec = 0;
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       timekeeper.total_sleep_time.tv_sec = 0;
+       timekeeper.total_sleep_time.tv_nsec = 0;
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 }
 
 /* time in seconds when suspend began */
@@ -614,9 +634,11 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
                return;
        }
 
-       xtime = timespec_add(xtime, *delta);
-       wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
-       total_sleep_time = timespec_add(total_sleep_time, *delta);
+       timekeeper.xtime = timespec_add(timekeeper.xtime, *delta);
+       timekeeper.wall_to_monotonic =
+                       timespec_sub(timekeeper.wall_to_monotonic, *delta);
+       timekeeper.total_sleep_time = timespec_add(
+                                       timekeeper.total_sleep_time, *delta);
 }
 
 
@@ -640,17 +662,15 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
        if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
                return;
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       write_seqlock_irqsave(&timekeeper.lock, flags);
+
        timekeeping_forward_now();
 
        __timekeeping_inject_sleeptime(delta);
 
-       timekeeper.ntp_error = 0;
-       ntp_clear();
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(true);
 
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -673,7 +693,7 @@ static void timekeeping_resume(void)
 
        clocksource_resume();
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       write_seqlock_irqsave(&timekeeper.lock, flags);
 
        if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
                ts = timespec_sub(ts, timekeeping_suspend_time);
@@ -683,7 +703,7 @@ static void timekeeping_resume(void)
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
        timekeeping_suspended = 0;
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
        touch_softlockup_watchdog();
 
@@ -701,7 +721,7 @@ static int timekeeping_suspend(void)
 
        read_persistent_clock(&timekeeping_suspend_time);
 
-       write_seqlock_irqsave(&xtime_lock, flags);
+       write_seqlock_irqsave(&timekeeper.lock, flags);
        timekeeping_forward_now();
        timekeeping_suspended = 1;
 
@@ -711,7 +731,7 @@ static int timekeeping_suspend(void)
         * try to compensate so the difference in system time
         * and persistent_clock time stays close to constant.
         */
-       delta = timespec_sub(xtime, timekeeping_suspend_time);
+       delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time);
        delta_delta = timespec_sub(delta, old_delta);
        if (abs(delta_delta.tv_sec)  >= 2) {
                /*
@@ -724,7 +744,7 @@ static int timekeeping_suspend(void)
                timekeeping_suspend_time =
                        timespec_add(timekeeping_suspend_time, delta_delta);
        }
-       write_sequnlock_irqrestore(&xtime_lock, flags);
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
        clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
        clocksource_suspend();
@@ -775,7 +795,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
         * Now calculate the error in (1 << look_ahead) ticks, but first
         * remove the single look ahead already included in the error.
         */
-       tick_error = tick_length >> (timekeeper.ntp_error_shift + 1);
+       tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1);
        tick_error -= timekeeper.xtime_interval >> 1;
        error = ((error - tick_error) >> look_ahead) + tick_error;
 
@@ -943,22 +963,22 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
        timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
        while (timekeeper.xtime_nsec >= nsecps) {
                timekeeper.xtime_nsec -= nsecps;
-               xtime.tv_sec++;
+               timekeeper.xtime.tv_sec++;
                second_overflow();
        }
 
        /* Accumulate raw time */
        raw_nsecs = timekeeper.raw_interval << shift;
-       raw_nsecs += raw_time.tv_nsec;
+       raw_nsecs += timekeeper.raw_time.tv_nsec;
        if (raw_nsecs >= NSEC_PER_SEC) {
                u64 raw_secs = raw_nsecs;
                raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-               raw_time.tv_sec += raw_secs;
+               timekeeper.raw_time.tv_sec += raw_secs;
        }
-       raw_time.tv_nsec = raw_nsecs;
+       timekeeper.raw_time.tv_nsec = raw_nsecs;
 
        /* Accumulate error between NTP and clock interval */
-       timekeeper.ntp_error += tick_length << shift;
+       timekeeper.ntp_error += ntp_tick_length() << shift;
        timekeeper.ntp_error -=
            (timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
                                (timekeeper.ntp_error_shift + shift);
@@ -970,17 +990,19 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
- * Called from the timer interrupt, must hold a write on xtime_lock.
  */
 static void update_wall_time(void)
 {
        struct clocksource *clock;
        cycle_t offset;
        int shift = 0, maxshift;
+       unsigned long flags;
+
+       write_seqlock_irqsave(&timekeeper.lock, flags);
 
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
-               return;
+               goto out;
 
        clock = timekeeper.clock;
 
@@ -989,7 +1011,8 @@ static void update_wall_time(void)
 #else
        offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #endif
-       timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
+       timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec <<
+                                               timekeeper.shift;
 
        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
@@ -1002,7 +1025,7 @@ static void update_wall_time(void)
        shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
        shift = max(0, shift);
        /* Bound shift to one less then what overflows tick_length */
-       maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+       maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
        shift = min(shift, maxshift);
        while (offset >= timekeeper.cycle_interval) {
                offset = logarithmic_accumulation(offset, shift);
@@ -1040,8 +1063,10 @@ static void update_wall_time(void)
         * Store full nanoseconds into xtime after rounding it up and
         * add the remainder to the error difference.
         */
-       xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
-       timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift;
+       timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >>
+                                               timekeeper.shift) + 1;
+       timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec <<
+                                               timekeeper.shift;
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
 
@@ -1049,15 +1074,17 @@ static void update_wall_time(void)
         * Finally, make sure that after the rounding
         * xtime.tv_nsec isn't larger then NSEC_PER_SEC
         */
-       if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
-               xtime.tv_nsec -= NSEC_PER_SEC;
-               xtime.tv_sec++;
+       if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
+               timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
+               timekeeper.xtime.tv_sec++;
                second_overflow();
        }
 
-       /* check to see if there is a new clocksource to use */
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(false);
+
+out:
+       write_sequnlock_irqrestore(&timekeeper.lock, flags);
+
 }
 
 /**
@@ -1074,8 +1101,10 @@ static void update_wall_time(void)
 void getboottime(struct timespec *ts)
 {
        struct timespec boottime = {
-               .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
-               .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
+               .tv_sec = timekeeper.wall_to_monotonic.tv_sec +
+                               timekeeper.total_sleep_time.tv_sec,
+               .tv_nsec = timekeeper.wall_to_monotonic.tv_nsec +
+                               timekeeper.total_sleep_time.tv_nsec
        };
 
        set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
@@ -1101,13 +1130,13 @@ void get_monotonic_boottime(struct timespec *ts)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&xtime_lock);
-               *ts = xtime;
-               tomono = wall_to_monotonic;
-               sleep = total_sleep_time;
+               seq = read_seqbegin(&timekeeper.lock);
+               *ts = timekeeper.xtime;
+               tomono = timekeeper.wall_to_monotonic;
+               sleep = timekeeper.total_sleep_time;
                nsecs = timekeeping_get_ns();
 
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
                        ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
@@ -1137,19 +1166,19 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
  */
 void monotonic_to_bootbased(struct timespec *ts)
 {
-       *ts = timespec_add(*ts, total_sleep_time);
+       *ts = timespec_add(*ts, timekeeper.total_sleep_time);
 }
 EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {
-       return xtime.tv_sec;
+       return timekeeper.xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-       return xtime;
+       return timekeeper.xtime;
 }
 
 struct timespec current_kernel_time(void)
@@ -1158,10 +1187,10 @@ struct timespec current_kernel_time(void)
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
 
-               now = xtime;
-       } while (read_seqretry(&xtime_lock, seq));
+               now = timekeeper.xtime;
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        return now;
 }
@@ -1173,11 +1202,11 @@ struct timespec get_monotonic_coarse(void)
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
+               seq = read_seqbegin(&timekeeper.lock);
 
-               now = xtime;
-               mono = wall_to_monotonic;
-       } while (read_seqretry(&xtime_lock, seq));
+               now = timekeeper.xtime;
+               mono = timekeeper.wall_to_monotonic;
+       } while (read_seqretry(&timekeeper.lock, seq));
 
        set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
                                now.tv_nsec + mono.tv_nsec);
@@ -1209,11 +1238,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
-               *xtim = xtime;
-               *wtom = wall_to_monotonic;
-               *sleep = total_sleep_time;
-       } while (read_seqretry(&xtime_lock, seq));
+               seq = read_seqbegin(&timekeeper.lock);
+               *xtim = timekeeper.xtime;
+               *wtom = timekeeper.wall_to_monotonic;
+               *sleep = timekeeper.total_sleep_time;
+       } while (read_seqretry(&timekeeper.lock, seq));
 }
 
 /**
@@ -1225,9 +1254,10 @@ ktime_t ktime_get_monotonic_offset(void)
        struct timespec wtom;
 
        do {
-               seq = read_seqbegin(&xtime_lock);
-               wtom = wall_to_monotonic;
-       } while (read_seqretry(&xtime_lock, seq));
+               seq = read_seqbegin(&timekeeper.lock);
+               wtom = timekeeper.wall_to_monotonic;
+       } while (read_seqretry(&timekeeper.lock, seq));
+
        return timespec_to_ktime(wtom);
 }
 
index 683d559..867bd1d 100644 (file)
@@ -62,6 +62,8 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
@@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
 };
 
 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
+static struct ftrace_ops control_ops;
 
 static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
@@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
 }
 #endif
 
+static void control_ops_disable_all(struct ftrace_ops *ops)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(ops->disabled, cpu) = 1;
+}
+
+static int control_ops_alloc(struct ftrace_ops *ops)
+{
+       int __percpu *disabled;
+
+       disabled = alloc_percpu(int);
+       if (!disabled)
+               return -ENOMEM;
+
+       ops->disabled = disabled;
+       control_ops_disable_all(ops);
+       return 0;
+}
+
+static void control_ops_free(struct ftrace_ops *ops)
+{
+       free_percpu(ops->disabled);
+}
+
 static void update_global_ops(void)
 {
        ftrace_func_t func;
@@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
        return 0;
 }
 
+static void add_ftrace_list_ops(struct ftrace_ops **list,
+                               struct ftrace_ops *main_ops,
+                               struct ftrace_ops *ops)
+{
+       int first = *list == &ftrace_list_end;
+       add_ftrace_ops(list, ops);
+       if (first)
+               add_ftrace_ops(&ftrace_ops_list, main_ops);
+}
+
+static int remove_ftrace_list_ops(struct ftrace_ops **list,
+                                 struct ftrace_ops *main_ops,
+                                 struct ftrace_ops *ops)
+{
+       int ret = remove_ftrace_ops(list, ops);
+       if (!ret && *list == &ftrace_list_end)
+               ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
+       return ret;
+}
+
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
        if (ftrace_disabled)
@@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
        if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
                return -EBUSY;
 
+       /* We don't support both control and global flags set. */
+       if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
+               return -EINVAL;
+
        if (!core_kernel_data((unsigned long)ops))
                ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
        if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-               int first = ftrace_global_list == &ftrace_list_end;
-               add_ftrace_ops(&ftrace_global_list, ops);
+               add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
                ops->flags |= FTRACE_OPS_FL_ENABLED;
-               if (first)
-                       add_ftrace_ops(&ftrace_ops_list, &global_ops);
+       } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+               if (control_ops_alloc(ops))
+                       return -ENOMEM;
+               add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
        } else
                add_ftrace_ops(&ftrace_ops_list, ops);
 
@@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
                return -EINVAL;
 
        if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-               ret = remove_ftrace_ops(&ftrace_global_list, ops);
-               if (!ret && ftrace_global_list == &ftrace_list_end)
-                       ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
+               ret = remove_ftrace_list_ops(&ftrace_global_list,
+                                            &global_ops, ops);
                if (!ret)
                        ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+       } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+               ret = remove_ftrace_list_ops(&ftrace_control_list,
+                                            &control_ops, ops);
+               if (!ret) {
+                       /*
+                        * The ftrace_ops is now removed from the list,
+                        * so there'll be no new users. We must ensure
+                        * all current users are done before we free
+                        * the control data.
+                        */
+                       synchronize_sched();
+                       control_ops_free(ops);
+               }
        } else
                ret = remove_ftrace_ops(&ftrace_ops_list, ops);
 
@@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
        call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
 }
 
+void ftrace_free_filter(struct ftrace_ops *ops)
+{
+       free_ftrace_hash(ops->filter_hash);
+       free_ftrace_hash(ops->notrace_hash);
+}
+
 static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
 {
        struct ftrace_hash *hash;
@@ -1129,7 +1202,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
                return NULL;
 
        size = 1 << size_bits;
-       hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL);
+       hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL);
 
        if (!hash->buckets) {
                kfree(hash);
@@ -3146,8 +3219,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
        mutex_lock(&ftrace_regex_lock);
        if (reset)
                ftrace_filter_reset(hash);
-       if (buf)
-               ftrace_match_records(hash, buf, len);
+       if (buf && !ftrace_match_records(hash, buf, len)) {
+               ret = -EINVAL;
+               goto out_regex_unlock;
+       }
 
        mutex_lock(&ftrace_lock);
        ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3157,6 +3232,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 
        mutex_unlock(&ftrace_lock);
 
+ out_regex_unlock:
        mutex_unlock(&ftrace_regex_lock);
 
        free_ftrace_hash(hash);
@@ -3173,10 +3249,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
  * Filters denote which functions should be enabled when tracing is enabled.
  * If @buf is NULL and reset is set, all functions will be enabled for tracing.
  */
-void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
                       int len, int reset)
 {
-       ftrace_set_regex(ops, buf, len, reset, 1);
+       return ftrace_set_regex(ops, buf, len, reset, 1);
 }
 EXPORT_SYMBOL_GPL(ftrace_set_filter);
 
@@ -3191,10 +3267,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
  * is enabled. If @buf is NULL and reset is set, all functions will be enabled
  * for tracing.
  */
-void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
                        int len, int reset)
 {
-       ftrace_set_regex(ops, buf, len, reset, 0);
+       return ftrace_set_regex(ops, buf, len, reset, 0);
 }
 EXPORT_SYMBOL_GPL(ftrace_set_notrace);
 /**
@@ -3871,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 static void
+ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
+{
+       struct ftrace_ops *op;
+
+       if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
+               return;
+
+       /*
+        * Some of the ops may be dynamically allocated,
+        * they must be freed after a synchronize_sched().
+        */
+       preempt_disable_notrace();
+       trace_recursion_set(TRACE_CONTROL_BIT);
+       op = rcu_dereference_raw(ftrace_control_list);
+       while (op != &ftrace_list_end) {
+               if (!ftrace_function_local_disabled(op) &&
+                   ftrace_ops_test(op, ip))
+                       op->func(ip, parent_ip);
+
+               op = rcu_dereference_raw(op->next);
+       };
+       trace_recursion_clear(TRACE_CONTROL_BIT);
+       preempt_enable_notrace();
+}
+
+static struct ftrace_ops control_ops = {
+       .func = ftrace_ops_control_func,
+};
+
+static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
        struct ftrace_ops *op;
index a3f1bc5..10d5503 100644 (file)
@@ -2764,12 +2764,12 @@ static const char readme_msg[] =
        "tracing mini-HOWTO:\n\n"
        "# mount -t debugfs nodev /sys/kernel/debug\n\n"
        "# cat /sys/kernel/debug/tracing/available_tracers\n"
-       "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
+       "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
        "# cat /sys/kernel/debug/tracing/current_tracer\n"
        "nop\n"
-       "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
+       "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
        "# cat /sys/kernel/debug/tracing/current_tracer\n"
-       "sched_switch\n"
+       "wakeup\n"
        "# cat /sys/kernel/debug/tracing/trace_options\n"
        "noprint-parent nosym-offset nosym-addr noverbose\n"
        "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
index b93ecba..54faec7 100644 (file)
@@ -56,17 +56,23 @@ enum trace_type {
 #define F_STRUCT(args...)              args
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)    \
-       struct struct_name {                                    \
-               struct trace_entry      ent;                    \
-               tstruct                                         \
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)    \
+       struct struct_name {                                            \
+               struct trace_entry      ent;                            \
+               tstruct                                                 \
        }
 
 #undef TP_ARGS
 #define TP_ARGS(args...)       args
 
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
+#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter)
+
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print,        \
+                        filter, regfn) \
+       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter)
 
 #include "trace_entries.h"
 
@@ -288,6 +294,8 @@ struct tracer {
 /* for function tracing recursion */
 #define TRACE_INTERNAL_BIT             (1<<11)
 #define TRACE_GLOBAL_BIT               (1<<12)
+#define TRACE_CONTROL_BIT              (1<<13)
+
 /*
  * Abuse of the trace_recursion.
  * As we need a way to maintain state if we are tracing the function
@@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task)
 static inline int ftrace_is_dead(void) { return 0; }
 #endif
 
+int ftrace_event_is_function(struct ftrace_event_call *call);
+
 /*
  * struct trace_parser - servers for reading the user input separated by spaces
  * @cont: set if the input is not complete - no final space char was found
@@ -766,9 +776,7 @@ struct filter_pred {
        u64                     val;
        struct regex            regex;
        unsigned short          *ops;
-#ifdef CONFIG_FTRACE_STARTUP_TEST
        struct ftrace_event_field *field;
-#endif
        int                     offset;
        int                     not;
        int                     op;
@@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(call, struct_name, id, tstruct, print)            \
+#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)    \
        extern struct ftrace_event_call                                 \
        __attribute__((__aligned__(4))) event_##call;
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)                \
-       FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)        \
+       FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter)
 #include "trace_entries.h"
 
+#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_FUNCTION_TRACER
+int perf_ftrace_event_register(struct ftrace_event_call *call,
+                              enum trace_reg type, void *data);
+#else
+#define perf_ftrace_event_register NULL
+#endif /* CONFIG_FUNCTION_TRACER */
+#endif /* CONFIG_PERF_EVENTS */
+
 #endif /* _LINUX_KERNEL_TRACE_H */
index 9336590..d91eb05 100644 (file)
@@ -55,7 +55,7 @@
 /*
  * Function trace entry - function address and parent function address:
  */
-FTRACE_ENTRY(function, ftrace_entry,
+FTRACE_ENTRY_REG(function, ftrace_entry,
 
        TRACE_FN,
 
@@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry,
                __field(        unsigned long,  parent_ip       )
        ),
 
-       F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
+       F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
+
+       FILTER_TRACE_FN,
+
+       perf_ftrace_event_register
 );
 
 /* Function call entry */
@@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
                __field_desc(   int,            graph_ent,      depth           )
        ),
 
-       F_printk("--> %lx (%d)", __entry->func, __entry->depth)
+       F_printk("--> %lx (%d)", __entry->func, __entry->depth),
+
+       FILTER_OTHER
 );
 
 /* Function return entry */
@@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
        F_printk("<-- %lx (%d) (start: %llx  end: %llx) over: %d",
                 __entry->func, __entry->depth,
                 __entry->calltime, __entry->rettime,
-                __entry->depth)
+                __entry->depth),
+
+       FILTER_OTHER
 );
 
 /*
@@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
        F_printk("%u:%u:%u  ==> %u:%u:%u [%03u]",
                 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
                 __entry->next_pid, __entry->next_prio, __entry->next_state,
-                __entry->next_cpu
-               )
+                __entry->next_cpu),
+
+       FILTER_OTHER
 );
 
 /*
@@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
        F_printk("%u:%u:%u  ==+ %u:%u:%u [%03u]",
                 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
                 __entry->next_pid, __entry->next_prio, __entry->next_state,
-                __entry->next_cpu
-               )
+                __entry->next_cpu),
+
+       FILTER_OTHER
 );
 
 /*
@@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
                 __entry->caller[0], __entry->caller[1], __entry->caller[2],
                 __entry->caller[3], __entry->caller[4], __entry->caller[5],
-                __entry->caller[6], __entry->caller[7])
+                __entry->caller[6], __entry->caller[7]),
+
+       FILTER_OTHER
 );
 
 FTRACE_ENTRY(user_stack, userstack_entry,
@@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
                 __entry->caller[0], __entry->caller[1], __entry->caller[2],
                 __entry->caller[3], __entry->caller[4], __entry->caller[5],
-                __entry->caller[6], __entry->caller[7])
+                __entry->caller[6], __entry->caller[7]),
+
+       FILTER_OTHER
 );
 
 /*
@@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry,
        ),
 
        F_printk("%08lx fmt:%p",
-                __entry->ip, __entry->fmt)
+                __entry->ip, __entry->fmt),
+
+       FILTER_OTHER
 );
 
 FTRACE_ENTRY(print, print_entry,
@@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry,
        ),
 
        F_printk("%08lx %s",
-                __entry->ip, __entry->buf)
+                __entry->ip, __entry->buf),
+
+       FILTER_OTHER
 );
 
 FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
@@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
 
        F_printk("%lx %lx %lx %d %x %x",
                 (unsigned long)__entry->phys, __entry->value, __entry->pc,
-                __entry->map_id, __entry->opcode, __entry->width)
+                __entry->map_id, __entry->opcode, __entry->width),
+
+       FILTER_OTHER
 );
 
 FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
@@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
 
        F_printk("%lx %lx %lx %d %x",
                 (unsigned long)__entry->phys, __entry->virt, __entry->len,
-                __entry->map_id, __entry->opcode)
+                __entry->map_id, __entry->opcode),
+
+       FILTER_OTHER
 );
 
 
@@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch,
 
        F_printk("%u:%s:%s (%u)",
                 __entry->line,
-                __entry->func, __entry->file, __entry->correct)
+                __entry->func, __entry->file, __entry->correct),
+
+       FILTER_OTHER
 );
 
index 19a359d..fee3752 100644 (file)
@@ -24,6 +24,11 @@ static int   total_ref_count;
 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
+       /* The ftrace function trace is allowed only for root. */
+       if (ftrace_event_is_function(tp_event) &&
+           perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
        /* No tracing, just counting, so no obvious leak */
        if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
                return 0;
@@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
        return 0;
 }
 
-static int perf_trace_event_init(struct ftrace_event_call *tp_event,
-                                struct perf_event *p_event)
+static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
+                               struct perf_event *p_event)
 {
        struct hlist_head __percpu *list;
-       int ret;
+       int ret = -ENOMEM;
        int cpu;
 
-       ret = perf_trace_event_perm(tp_event, p_event);
-       if (ret)
-               return ret;
-
        p_event->tp_event = tp_event;
        if (tp_event->perf_refcount++ > 0)
                return 0;
 
-       ret = -ENOMEM;
-
        list = alloc_percpu(struct hlist_head);
        if (!list)
                goto fail;
@@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                }
        }
 
-       ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
+       ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
        if (ret)
                goto fail;
 
@@ -108,6 +107,69 @@ fail:
        return ret;
 }
 
+static void perf_trace_event_unreg(struct perf_event *p_event)
+{
+       struct ftrace_event_call *tp_event = p_event->tp_event;
+       int i;
+
+       if (--tp_event->perf_refcount > 0)
+               goto out;
+
+       tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
+
+       /*
+        * Ensure our callback won't be called anymore. The buffers
+        * will be freed after that.
+        */
+       tracepoint_synchronize_unregister();
+
+       free_percpu(tp_event->perf_events);
+       tp_event->perf_events = NULL;
+
+       if (!--total_ref_count) {
+               for (i = 0; i < PERF_NR_CONTEXTS; i++) {
+                       free_percpu(perf_trace_buf[i]);
+                       perf_trace_buf[i] = NULL;
+               }
+       }
+out:
+       module_put(tp_event->mod);
+}
+
+static int perf_trace_event_open(struct perf_event *p_event)
+{
+       struct ftrace_event_call *tp_event = p_event->tp_event;
+       return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
+}
+
+static void perf_trace_event_close(struct perf_event *p_event)
+{
+       struct ftrace_event_call *tp_event = p_event->tp_event;
+       tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
+}
+
+static int perf_trace_event_init(struct ftrace_event_call *tp_event,
+                                struct perf_event *p_event)
+{
+       int ret;
+
+       ret = perf_trace_event_perm(tp_event, p_event);
+       if (ret)
+               return ret;
+
+       ret = perf_trace_event_reg(tp_event, p_event);
+       if (ret)
+               return ret;
+
+       ret = perf_trace_event_open(p_event);
+       if (ret) {
+               perf_trace_event_unreg(p_event);
+               return ret;
+       }
+
+       return 0;
+}
+
 int perf_trace_init(struct perf_event *p_event)
 {
        struct ftrace_event_call *tp_event;
@@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event)
        return ret;
 }
 
+void perf_trace_destroy(struct perf_event *p_event)
+{
+       mutex_lock(&event_mutex);
+       perf_trace_event_close(p_event);
+       perf_trace_event_unreg(p_event);
+       mutex_unlock(&event_mutex);
+}
+
 int perf_trace_add(struct perf_event *p_event, int flags)
 {
        struct ftrace_event_call *tp_event = p_event->tp_event;
@@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags)
        list = this_cpu_ptr(pcpu_list);
        hlist_add_head_rcu(&p_event->hlist_entry, list);
 
-       return 0;
+       return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
 }
 
 void perf_trace_del(struct perf_event *p_event, int flags)
 {
-       hlist_del_rcu(&p_event->hlist_entry);
-}
-
-void perf_trace_destroy(struct perf_event *p_event)
-{
        struct ftrace_event_call *tp_event = p_event->tp_event;
-       int i;
-
-       mutex_lock(&event_mutex);
-       if (--tp_event->perf_refcount > 0)
-               goto out;
-
-       tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
-
-       /*
-        * Ensure our callback won't be called anymore. The buffers
-        * will be freed after that.
-        */
-       tracepoint_synchronize_unregister();
-
-       free_percpu(tp_event->perf_events);
-       tp_event->perf_events = NULL;
-
-       if (!--total_ref_count) {
-               for (i = 0; i < PERF_NR_CONTEXTS; i++) {
-                       free_percpu(perf_trace_buf[i]);
-                       perf_trace_buf[i] = NULL;
-               }
-       }
-out:
-       module_put(tp_event->mod);
-       mutex_unlock(&event_mutex);
+       hlist_del_rcu(&p_event->hlist_entry);
+       tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
 }
 
 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
        return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void
+perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
+{
+       struct ftrace_entry *entry;
+       struct hlist_head *head;
+       struct pt_regs regs;
+       int rctx;
+
+#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
+                   sizeof(u64)) - sizeof(u32))
+
+       BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
+
+       perf_fetch_caller_regs(&regs);
+
+       entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
+       if (!entry)
+               return;
+
+       entry->ip = ip;
+       entry->parent_ip = parent_ip;
+
+       head = this_cpu_ptr(event_function.perf_events);
+       perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
+                             1, &regs, head);
+
+#undef ENTRY_SIZE
+}
+
+static int perf_ftrace_function_register(struct perf_event *event)
+{
+       struct ftrace_ops *ops = &event->ftrace_ops;
+
+       ops->flags |= FTRACE_OPS_FL_CONTROL;
+       ops->func = perf_ftrace_function_call;
+       return register_ftrace_function(ops);
+}
+
+static int perf_ftrace_function_unregister(struct perf_event *event)
+{
+       struct ftrace_ops *ops = &event->ftrace_ops;
+       int ret = unregister_ftrace_function(ops);
+       ftrace_free_filter(ops);
+       return ret;
+}
+
+static void perf_ftrace_function_enable(struct perf_event *event)
+{
+       ftrace_function_local_enable(&event->ftrace_ops);
+}
+
+static void perf_ftrace_function_disable(struct perf_event *event)
+{
+       ftrace_function_local_disable(&event->ftrace_ops);
+}
+
+int perf_ftrace_event_register(struct ftrace_event_call *call,
+                              enum trace_reg type, void *data)
+{
+       switch (type) {
+       case TRACE_REG_REGISTER:
+       case TRACE_REG_UNREGISTER:
+               break;
+       case TRACE_REG_PERF_REGISTER:
+       case TRACE_REG_PERF_UNREGISTER:
+               return 0;
+       case TRACE_REG_PERF_OPEN:
+               return perf_ftrace_function_register(data);
+       case TRACE_REG_PERF_CLOSE:
+               return perf_ftrace_function_unregister(data);
+       case TRACE_REG_PERF_ADD:
+               perf_ftrace_function_enable(data);
+               return 0;
+       case TRACE_REG_PERF_DEL:
+               perf_ftrace_function_disable(data);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
index c212a7f..079a93a 100644 (file)
@@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
-int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+int ftrace_event_reg(struct ftrace_event_call *call,
+                    enum trace_reg type, void *data)
 {
        switch (type) {
        case TRACE_REG_REGISTER:
@@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
                                            call->class->perf_probe,
                                            call);
                return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
 #endif
        }
        return 0;
@@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
                                tracing_stop_cmdline_record();
                                call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
                        }
-                       call->class->reg(call, TRACE_REG_UNREGISTER);
+                       call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
                }
                break;
        case 1:
@@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
                                tracing_start_cmdline_record();
                                call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
                        }
-                       ret = call->class->reg(call, TRACE_REG_REGISTER);
+                       ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
                        if (ret) {
                                tracing_stop_cmdline_record();
                                pr_info("event trace: Could not enable event "
index 24aee71..431dba8 100644 (file)
@@ -81,6 +81,7 @@ enum {
        FILT_ERR_TOO_MANY_PREDS,
        FILT_ERR_MISSING_FIELD,
        FILT_ERR_INVALID_FILTER,
+       FILT_ERR_IP_FIELD_ONLY,
 };
 
 static char *err_text[] = {
@@ -96,6 +97,7 @@ static char *err_text[] = {
        "Too many terms in predicate expression",
        "Missing field name and/or value",
        "Meaningless filter expression",
+       "Only 'ip' field is supported for function trace",
 };
 
 struct opstack_op {
@@ -685,7 +687,7 @@ find_event_field(struct ftrace_event_call *call, char *name)
 
 static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
 {
-       stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+       stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
        if (!stack->preds)
                return -ENOMEM;
        stack->index = n_preds;
@@ -826,8 +828,7 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
        if (filter->preds)
                __free_preds(filter);
 
-       filter->preds =
-               kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
+       filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL);
 
        if (!filter->preds)
                return -ENOMEM;
@@ -900,6 +901,11 @@ int filter_assign_type(const char *type)
        return FILTER_OTHER;
 }
 
+static bool is_function_field(struct ftrace_event_field *field)
+{
+       return field->filter_type == FILTER_TRACE_FN;
+}
+
 static bool is_string_field(struct ftrace_event_field *field)
 {
        return field->filter_type == FILTER_DYN_STRING ||
@@ -987,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps,
                        fn = filter_pred_strloc;
                else
                        fn = filter_pred_pchar;
+       } else if (is_function_field(field)) {
+               if (strcmp(field->name, "ip")) {
+                       parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
+                       return -EINVAL;
+               }
        } else {
                if (field->is_signed)
                        ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@ -1334,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
 
        strcpy(pred.regex.pattern, operand2);
        pred.regex.len = strlen(pred.regex.pattern);
-
-#ifdef CONFIG_FTRACE_STARTUP_TEST
        pred.field = field;
-#endif
        return init_pred(ps, field, &pred) ? NULL : &pred;
 }
 
@@ -1486,7 +1494,7 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
        children = count_leafs(preds, &preds[root->left]);
        children += count_leafs(preds, &preds[root->right]);
 
-       root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
+       root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL);
        if (!root->ops)
                return -ENOMEM;
 
@@ -1950,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event)
        __free_filter(filter);
 }
 
+struct function_filter_data {
+       struct ftrace_ops *ops;
+       int first_filter;
+       int first_notrace;
+};
+
+#ifdef CONFIG_FUNCTION_TRACER
+static char **
+ftrace_function_filter_re(char *buf, int len, int *count)
+{
+       char *str, *sep, **re;
+
+       str = kstrndup(buf, len, GFP_KERNEL);
+       if (!str)
+               return NULL;
+
+       /*
+        * The argv_split function takes white space
+        * as a separator, so convert ',' into spaces.
+        */
+       while ((sep = strchr(str, ',')))
+               *sep = ' ';
+
+       re = argv_split(GFP_KERNEL, str, count);
+       kfree(str);
+       return re;
+}
+
+static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
+                                     int reset, char *re, int len)
+{
+       int ret;
+
+       if (filter)
+               ret = ftrace_set_filter(ops, re, len, reset);
+       else
+               ret = ftrace_set_notrace(ops, re, len, reset);
+
+       return ret;
+}
+
+static int __ftrace_function_set_filter(int filter, char *buf, int len,
+                                       struct function_filter_data *data)
+{
+       int i, re_cnt, ret;
+       int *reset;
+       char **re;
+
+       reset = filter ? &data->first_filter : &data->first_notrace;
+
+       /*
+        * The 'ip' field could have multiple filters set, separated
+        * either by space or comma. We first cut the filter and apply
+        * all pieces separatelly.
+        */
+       re = ftrace_function_filter_re(buf, len, &re_cnt);
+       if (!re)
+               return -EINVAL;
+
+       for (i = 0; i < re_cnt; i++) {
+               ret = ftrace_function_set_regexp(data->ops, filter, *reset,
+                                                re[i], strlen(re[i]));
+               if (ret)
+                       break;
+
+               if (*reset)
+                       *reset = 0;
+       }
+
+       argv_free(re);
+       return ret;
+}
+
+static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
+{
+       struct ftrace_event_field *field = pred->field;
+
+       if (leaf) {
+               /*
+                * Check the leaf predicate for function trace, verify:
+                *  - only '==' and '!=' is used
+                *  - the 'ip' field is used
+                */
+               if ((pred->op != OP_EQ) && (pred->op != OP_NE))
+                       return -EINVAL;
+
+               if (strcmp(field->name, "ip"))
+                       return -EINVAL;
+       } else {
+               /*
+                * Check the non leaf predicate for function trace, verify:
+                *  - only '||' is used
+               */
+               if (pred->op != OP_OR)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int ftrace_function_set_filter_cb(enum move_type move,
+                                        struct filter_pred *pred,
+                                        int *err, void *data)
+{
+       /* Checking the node is valid for function trace. */
+       if ((move != MOVE_DOWN) ||
+           (pred->left != FILTER_PRED_INVALID)) {
+               *err = ftrace_function_check_pred(pred, 0);
+       } else {
+               *err = ftrace_function_check_pred(pred, 1);
+               if (*err)
+                       return WALK_PRED_ABORT;
+
+               *err = __ftrace_function_set_filter(pred->op == OP_EQ,
+                                                   pred->regex.pattern,
+                                                   pred->regex.len,
+                                                   data);
+       }
+
+       return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
+}
+
+static int ftrace_function_set_filter(struct perf_event *event,
+                                     struct event_filter *filter)
+{
+       struct function_filter_data data = {
+               .first_filter  = 1,
+               .first_notrace = 1,
+               .ops           = &event->ftrace_ops,
+       };
+
+       return walk_pred_tree(filter->preds, filter->root,
+                             ftrace_function_set_filter_cb, &data);
+}
+#else
+static int ftrace_function_set_filter(struct perf_event *event,
+                                     struct event_filter *filter)
+{
+       return -ENODEV;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
 int ftrace_profile_set_filter(struct perf_event *event, int event_id,
                              char *filter_str)
 {
@@ -1970,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
                goto out_unlock;
 
        err = create_filter(call, filter_str, false, &filter);
-       if (!err)
-               event->filter = filter;
+       if (err)
+               goto free_filter;
+
+       if (ftrace_event_is_function(call))
+               err = ftrace_function_set_filter(event, filter);
        else
+               event->filter = filter;
+
+free_filter:
+       if (err || ftrace_event_is_function(call))
                __free_filter(filter);
 
 out_unlock:
index bbeec31..7b46c9b 100644 (file)
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM   ftrace
 
+/*
+ * The FTRACE_ENTRY_REG macro allows ftrace entry to define register
+ * function and thus become accesible via perf.
+ */
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
+                        filter, regfn) \
+       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter)
+
 /* not needed for this file */
 #undef __field_struct
 #define __field_struct(type, item)
 #define F_printk(fmt, args...) fmt, args
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)    \
-struct ____ftrace_##name {                                     \
-       tstruct                                                 \
-};                                                             \
-static void __always_unused ____ftrace_check_##name(void)      \
-{                                                              \
-       struct ____ftrace_##name *__entry = NULL;               \
-                                                               \
-       /* force compile-time check on F_printk() */            \
-       printk(print);                                          \
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)    \
+struct ____ftrace_##name {                                             \
+       tstruct                                                         \
+};                                                                     \
+static void __always_unused ____ftrace_check_##name(void)              \
+{                                                                      \
+       struct ____ftrace_##name *__entry = NULL;                       \
+                                                                       \
+       /* force compile-time check on F_printk() */                    \
+       printk(print);                                                  \
 }
 
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print)        \
-       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter)        \
+       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter)
 
 #include "trace_entries.h"
 
@@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void)     \
        ret = trace_define_field(event_call, #type, #item,              \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
-                                is_signed_type(type), FILTER_OTHER);   \
+                                is_signed_type(type), filter_type);    \
        if (ret)                                                        \
                return ret;
 
@@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void)     \
                                 offsetof(typeof(field),                \
                                          container.item),              \
                                 sizeof(field.container.item),          \
-                                is_signed_type(type), FILTER_OTHER);   \
+                                is_signed_type(type), filter_type);    \
        if (ret)                                                        \
                return ret;
 
@@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void)    \
                ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
-                                is_signed_type(type), FILTER_OTHER);   \
+                                is_signed_type(type), filter_type);    \
                mutex_unlock(&event_storage_mutex);                     \
                if (ret)                                                \
                        return ret;                                     \
@@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void)   \
                                 offsetof(typeof(field),                \
                                          container.item),              \
                                 sizeof(field.container.item),          \
-                                is_signed_type(type), FILTER_OTHER);   \
+                                is_signed_type(type), filter_type);    \
        if (ret)                                                        \
                return ret;
 
@@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \
 #define __dynamic_array(type, item)                                    \
        ret = trace_define_field(event_call, #type, #item,              \
                                 offsetof(typeof(field), item),         \
-                                0, is_signed_type(type), FILTER_OTHER);\
+                                0, is_signed_type(type), filter_type);\
        if (ret)                                                        \
                return ret;
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)            \
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)    \
 int                                                                    \
 ftrace_define_fields_##name(struct ftrace_event_call *event_call)      \
 {                                                                      \
        struct struct_name field;                                       \
        int ret;                                                        \
+       int filter_type = filter;                                       \
                                                                        \
        tstruct;                                                        \
                                                                        \
@@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
 #undef F_printk
 #define F_printk(fmt, args...) #fmt ", "  __stringify(args)
 
-#undef FTRACE_ENTRY
-#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print)         \
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
+                        regfn)                                         \
                                                                        \
 struct ftrace_event_class event_class_ftrace_##call = {                        \
        .system                 = __stringify(TRACE_SYSTEM),            \
        .define_fields          = ftrace_define_fields_##call,          \
        .fields                 = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
+       .reg                    = regfn,                                \
 };                                                                     \
                                                                        \
 struct ftrace_event_call __used event_##call = {                       \
@@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = {                   \
 struct ftrace_event_call __used                                                \
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
 
+#undef FTRACE_ENTRY
+#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
+       FTRACE_ENTRY_REG(call, struct_name, etype,                      \
+                        PARAMS(tstruct), PARAMS(print), filter, NULL)
+
+int ftrace_event_is_function(struct ftrace_event_call *call)
+{
+       return call == &event_function;
+}
+
 #include "trace_entries.h"
index 00d527c..580a05e 100644 (file)
@@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
 #endif /* CONFIG_PERF_EVENTS */
 
 static __kprobes
-int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
+int kprobe_register(struct ftrace_event_call *event,
+                   enum trace_reg type, void *data)
 {
        struct trace_probe *tp = (struct trace_probe *)event->data;
 
@@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
        case TRACE_REG_PERF_UNREGISTER:
                disable_trace_probe(tp, TP_FLAG_PROFILE);
                return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
 #endif
        }
        return 0;
index 0d6ff35..c5a0187 100644 (file)
@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
        unsigned long mask;
        const char *str;
        const char *ret = p->buffer + p->len;
-       int i;
+       int i, first = 1;
 
        for (i = 0;  flag_array[i].name && flags; i++) {
 
@@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
                str = flag_array[i].name;
                flags &= ~mask;
-               if (p->len && delim)
+               if (!first && delim)
                        trace_seq_puts(p, delim);
+               else
+                       first = 0;
                trace_seq_puts(p, str);
        }
 
        /* check for left over flags */
        if (flags) {
-               if (p->len && delim)
+               if (!first && delim)
                        trace_seq_puts(p, delim);
                trace_seq_printf(p, "0x%lx", flags);
        }
@@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
                break;
        }
 
-       if (!p->len)
+       if (ret == (const char *)(p->buffer + p->len))
                trace_seq_printf(p, "0x%lx", val);
                
        trace_seq_putc(p, 0);
@@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
                break;
        }
 
-       if (!p->len)
+       if (ret == (const char *)(p->buffer + p->len))
                trace_seq_printf(p, "0x%llx", val);
 
        trace_seq_putc(p, 0);
index cb65454..96fc733 100644 (file)
@@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
 static int syscall_enter_register(struct ftrace_event_call *event,
-                                enum trace_reg type);
+                                enum trace_reg type, void *data);
 static int syscall_exit_register(struct ftrace_event_call *event,
-                                enum trace_reg type);
+                                enum trace_reg type, void *data);
 
 static int syscall_enter_define_fields(struct ftrace_event_call *call);
 static int syscall_exit_define_fields(struct ftrace_event_call *call);
@@ -468,8 +468,8 @@ int __init init_ftrace_syscalls(void)
        unsigned long addr;
        int i;
 
-       syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
-                                       NR_syscalls, GFP_KERNEL);
+       syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
+                                   GFP_KERNEL);
        if (!syscalls_metadata) {
                WARN_ON(1);
                return -ENOMEM;
@@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
 #endif /* CONFIG_PERF_EVENTS */
 
 static int syscall_enter_register(struct ftrace_event_call *event,
-                                enum trace_reg type)
+                                enum trace_reg type, void *data)
 {
        switch (type) {
        case TRACE_REG_REGISTER:
@@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event,
        case TRACE_REG_PERF_UNREGISTER:
                perf_sysenter_disable(event);
                return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
 #endif
        }
        return 0;
 }
 
 static int syscall_exit_register(struct ftrace_event_call *event,
-                                enum trace_reg type)
+                                enum trace_reg type, void *data)
 {
        switch (type) {
        case TRACE_REG_REGISTER:
@@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event,
        case TRACE_REG_PERF_UNREGISTER:
                perf_sysexit_disable(event);
                return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
 #endif
        }
        return 0;
index f1539de..d96ba22 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
 extern struct tracepoint * const __stop___tracepoints_ptrs[];
@@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
-       if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
+       if (elem->regfunc && !static_key_enabled(&elem->key) && active)
                elem->regfunc();
-       else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
+       else if (elem->unregfunc && static_key_enabled(&elem->key) && !active)
                elem->unregfunc();
 
        /*
@@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
         * is used.
         */
        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-       if (active && !jump_label_enabled(&elem->key))
-               jump_label_inc(&elem->key);
-       else if (!active && jump_label_enabled(&elem->key))
-               jump_label_dec(&elem->key);
+       if (active && !static_key_enabled(&elem->key))
+               static_key_slow_inc(&elem->key);
+       else if (!active && static_key_enabled(&elem->key))
+               static_key_slow_dec(&elem->key);
 }
 
 /*
@@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
-       if (elem->unregfunc && jump_label_enabled(&elem->key))
+       if (elem->unregfunc && static_key_enabled(&elem->key))
                elem->unregfunc();
 
-       if (jump_label_enabled(&elem->key))
-               jump_label_dec(&elem->key);
+       if (static_key_enabled(&elem->key))
+               static_key_slow_dec(&elem->key);
        rcu_assign_pointer(elem->funcs, NULL);
 }
 
index d117262..14bc092 100644 (file)
@@ -3,12 +3,9 @@
  *
  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  *
- * this code detects hard lockups: incidents in where on a CPU
- * the kernel does not respond to anything except NMI.
- *
- * Note: Most of this code is borrowed heavily from softlockup.c,
- * so thanks to Ingo for the initial implementation.
- * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
  * to those contributors as well.
  */
 
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void)
 {
        /*
         * convert watchdog_thresh from seconds to ns
-        * the divide by 5 is to give hrtimer 5 chances to
-        * increment before the hardlockup detector generates
-        * a warning
+        * the divide by 5 is to give hrtimer several chances (two
+        * or three with the current relation between the soft
+        * and hard thresholds) to increment before the
+        * hardlockup detector generates a warning
         */
        return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
 }
@@ -336,9 +334,11 @@ static int watchdog(void *unused)
 
        set_current_state(TASK_INTERRUPTIBLE);
        /*
-        * Run briefly once per second to reset the softlockup timestamp.
-        * If this gets delayed for more than 60 seconds then the
-        * debug-printout triggers in watchdog_timer_fn().
+        * Run briefly (kicked by the hrtimer callback function) once every
+        * get_sample_period() seconds (4 seconds by default) to reset the
+        * softlockup timestamp. If this gets delayed for more than
+        * 2*watchdog_thresh seconds then the debug-printout triggers in
+        * watchdog_timer_fn().
         */
        while (!kthread_should_stop()) {
                __touch_watchdog();
index bec7b5b..f2c5638 100644 (file)
@@ -253,11 +253,13 @@ struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
 struct workqueue_struct *system_unbound_wq __read_mostly;
 struct workqueue_struct *system_freezable_wq __read_mostly;
+struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3833,8 +3835,11 @@ static int __init init_workqueues(void)
                                            WQ_UNBOUND_MAX_ACTIVE);
        system_freezable_wq = alloc_workqueue("events_freezable",
                                              WQ_FREEZABLE, 0);
+       system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
+                       WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
        BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-              !system_unbound_wq || !system_freezable_wq);
+              !system_unbound_wq || !system_freezable_wq ||
+               !system_nrt_freezable_wq);
        return 0;
 }
 early_initcall(init_workqueues);
index 8745ac7..05037dc 100644 (file)
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR
          hard and soft lockups.
 
          Softlockups are bugs that cause the kernel to loop in kernel
-         mode for more than 60 seconds, without giving other tasks a
+         mode for more than 20 seconds, without giving other tasks a
          chance to run.  The current stack trace is displayed upon
          detection and the system will stay locked up.
 
          Hardlockups are bugs that cause the CPU to loop in kernel mode
-         for more than 60 seconds, without letting other interrupts have a
+         for more than 10 seconds, without letting other interrupts have a
          chance to run.  The current stack trace is displayed upon detection
          and the system will stay locked up.
 
          The overhead should be minimal.  A periodic hrtimer runs to
-         generate interrupts and kick the watchdog task every 10-12 seconds.
-         An NMI is generated every 60 seconds or so to check for hardlockups.
+         generate interrupts and kick the watchdog task every 4 seconds.
+         An NMI is generated every 10 seconds or so to check for hardlockups.
+
+         The frequency of hrtimer and NMI events and the soft and hard lockup
+         thresholds can be controlled through the sysctl watchdog_thresh.
 
 config HARDLOCKUP_DETECTOR
        def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
        help
          Say Y here to enable the kernel to panic on "hard lockups",
          which are bugs that cause the kernel to loop in kernel
-         mode with interrupts disabled for more than 60 seconds.
+         mode with interrupts disabled for more than 10 seconds (configurable
+         using the watchdog_thresh sysctl).
 
          Say N if unsure.
 
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
        help
          Say Y here to enable the kernel to panic on "soft lockups",
          which are bugs that cause the kernel to loop in kernel
-         mode for more than 60 seconds, without giving other tasks a
-         chance to run.
+         mode for more than 20 seconds (configurable using the watchdog_thresh
+         sysctl), without giving other tasks a chance to run.
 
          The panic can be used in combination with panic_timeout,
          to cause the system to reboot automatically after a
@@ -927,6 +931,30 @@ config RCU_CPU_STALL_VERBOSE
 
          Say Y if you want to enable such checks.
 
+config RCU_CPU_STALL_INFO
+       bool "Print additional diagnostics on RCU CPU stall"
+       depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL
+       default n
+       help
+         For each stalled CPU that is aware of the current RCU grace
+         period, print out additional per-CPU diagnostic information
+         regarding scheduling-clock ticks, idle state, and,
+         for RCU_FAST_NO_HZ kernels, idle-entry state.
+
+         Say N if you are unsure.
+
+         Say Y if you want to enable such diagnostics.
+
+config RCU_TRACE
+       bool "Enable tracing for RCU"
+       depends on DEBUG_KERNEL
+       help
+         This option provides tracing in RCU which presents stats
+         in debugfs for debugging RCU implementation.
+
+         Say Y here if you want to enable RCU tracing
+         Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
        bool "Kprobes sanity tests"
        depends on DEBUG_KERNEL
index 77cb245..0ab9ae8 100644 (file)
@@ -818,17 +818,9 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state)
                if (obj->static_init == 1) {
                        debug_object_init(obj, &descr_type_test);
                        debug_object_activate(obj, &descr_type_test);
-                       /*
-                        * Real code should return 0 here ! This is
-                        * not a fixup of some bad behaviour. We
-                        * merily call the debug_init function to keep
-                        * track of the object.
-                        */
-                       return 1;
-               } else {
-                       /* Real code needs to emit a warning here */
+                       return 0;
                }
-               return 0;
+               return 1;
 
        case ODEBUG_STATE_ACTIVE:
                debug_object_deactivate(obj, &descr_type_test);
@@ -967,7 +959,7 @@ static void __init debug_objects_selftest(void)
 
        obj.static_init = 1;
        debug_object_activate(&obj, &descr_type_test);
-       if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings))
+       if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
                goto out;
        debug_object_init(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
index 3d1bdcd..6ab4587 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
 #include <linux/dynamic_queue_limits.h>
 
 #define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
index 8e75003..38e612e 100644 (file)
@@ -891,9 +891,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
        case 'U':
                return uuid_string(buf, end, ptr, spec, fmt);
        case 'V':
-               return buf + vsnprintf(buf, end > buf ? end - buf : 0,
-                                      ((struct va_format *)ptr)->fmt,
-                                      *(((struct va_format *)ptr)->va));
+               {
+                       va_list va;
+
+                       va_copy(va, *((struct va_format *)ptr)->va);
+                       buf += vsnprintf(buf, end > buf ? end - buf : 0,
+                                        ((struct va_format *)ptr)->fmt, va);
+                       va_end(va);
+                       return buf;
+               }
        case 'K':
                /*
                 * %pK cannot be used in IRQ context because its test
index 91d3efb..8f7fc39 100644 (file)
@@ -671,6 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                set_pmd_at(mm, haddr, pmd, entry);
                prepare_pmd_huge_pte(pgtable, mm);
                add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
+               mm->nr_ptes++;
                spin_unlock(&mm->page_table_lock);
        }
 
@@ -789,6 +790,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        pmd = pmd_mkold(pmd_wrprotect(pmd));
        set_pmd_at(dst_mm, addr, dst_pmd, pmd);
        prepare_pmd_huge_pte(pgtable, dst_mm);
+       dst_mm->nr_ptes++;
 
        ret = 0;
 out_unlock:
@@ -887,7 +889,6 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
        }
        kfree(pages);
 
-       mm->nr_ptes++;
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
        page_remove_rmap(page);
@@ -1047,6 +1048,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        VM_BUG_ON(page_mapcount(page) < 0);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                        VM_BUG_ON(!PageHead(page));
+                       tlb->mm->nr_ptes--;
                        spin_unlock(&tlb->mm->page_table_lock);
                        tlb_remove_page(tlb, page);
                        pte_free(tlb->mm, pgtable);
@@ -1375,7 +1377,6 @@ static int __split_huge_page_map(struct page *page,
                        pte_unmap(pte);
                }
 
-               mm->nr_ptes++;
                smp_wmb(); /* make pte visible before pmd */
                /*
                 * Up to this point the pmd is present and huge and
@@ -1988,7 +1989,6 @@ static void collapse_huge_page(struct mm_struct *mm,
        set_pmd_at(mm, address, pmd, _pmd);
        update_mmu_cache(vma, address, _pmd);
        prepare_pmd_huge_pte(pgtable, mm);
-       mm->nr_ptes--;
        spin_unlock(&mm->page_table_lock);
 
 #ifndef CONFIG_NUMA
index 5f34bd8..a876871 100644 (file)
@@ -2277,8 +2277,8 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                        set_page_dirty(page);
                list_add(&page->lru, &page_list);
        }
-       spin_unlock(&mm->page_table_lock);
        flush_tlb_range(vma, start, end);
+       spin_unlock(&mm->page_table_lock);
        mmu_notifier_invalidate_range_end(mm, start, end);
        list_for_each_entry_safe(page, tmp, &page_list, lru) {
                page_remove_rmap(page);
index 1925ffb..310544a 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -28,7 +28,6 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
-#include <linux/memcontrol.h>
 #include <linux/rbtree.h>
 #include <linux/memory.h>
 #include <linux/mmu_notifier.h>
@@ -1572,16 +1571,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 
        new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
        if (new_page) {
-               /*
-                * The memcg-specific accounting when moving
-                * pages around the LRU lists relies on the
-                * page's owner (memcg) to be valid.  Usually,
-                * pages are assigned to a new owner before
-                * being put on the LRU list, but since this
-                * is not the case here, the stale owner from
-                * a previous allocation cycle must be reset.
-                */
-               mem_cgroup_reset_owner(new_page);
                copy_user_highpage(new_page, page, address, vma);
 
                SetPageDirty(new_page);
index 228d646..58a08fc 100644 (file)
@@ -230,10 +230,30 @@ struct mem_cgroup {
         * the counter to account for memory usage
         */
        struct res_counter res;
-       /*
-        * the counter to account for mem+swap usage.
-        */
-       struct res_counter memsw;
+
+       union {
+               /*
+                * the counter to account for mem+swap usage.
+                */
+               struct res_counter memsw;
+
+               /*
+                * rcu_freeing is used only when freeing struct mem_cgroup,
+                * so put it into a union to avoid wasting more memory.
+                * It must be disjoint from the css field.  It could be
+                * in a union with the res field, but res plays a much
+                * larger part in mem_cgroup life than memsw, and might
+                * be of interest, even at time of free, when debugging.
+                * So share rcu_head with the less interesting memsw.
+                */
+               struct rcu_head rcu_freeing;
+               /*
+                * But when using vfree(), that cannot be done at
+                * interrupt time, so we must then queue the work.
+                */
+               struct work_struct work_freeing;
+       };
+
        /*
         * Per cgroup active and inactive list, similar to the
         * per zone LRU lists.
@@ -1042,6 +1062,19 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 
        pc = lookup_page_cgroup(page);
        memcg = pc->mem_cgroup;
+
+       /*
+        * Surreptitiously switch any uncharged page to root:
+        * an uncharged page off lru does nothing to secure
+        * its former mem_cgroup from sudden removal.
+        *
+        * Our caller holds lru_lock, and PageCgroupUsed is updated
+        * under page_cgroup lock: between them, they make all uses
+        * of pc->mem_cgroup safe.
+        */
+       if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup)
+               pc->mem_cgroup = memcg = root_mem_cgroup;
+
        mz = page_cgroup_zoneinfo(memcg, page);
        /* compound_order() is stabilized through lru_lock */
        MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -2408,8 +2441,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                                       struct page *page,
                                       unsigned int nr_pages,
                                       struct page_cgroup *pc,
-                                      enum charge_type ctype)
+                                      enum charge_type ctype,
+                                      bool lrucare)
 {
+       struct zone *uninitialized_var(zone);
+       bool was_on_lru = false;
+
        lock_page_cgroup(pc);
        if (unlikely(PageCgroupUsed(pc))) {
                unlock_page_cgroup(pc);
@@ -2420,6 +2457,21 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
         * we don't need page_cgroup_lock about tail pages, becase they are not
         * accessed by any other context at this point.
         */
+
+       /*
+        * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
+        * may already be on some other mem_cgroup's LRU.  Take care of it.
+        */
+       if (lrucare) {
+               zone = page_zone(page);
+               spin_lock_irq(&zone->lru_lock);
+               if (PageLRU(page)) {
+                       ClearPageLRU(page);
+                       del_page_from_lru_list(zone, page, page_lru(page));
+                       was_on_lru = true;
+               }
+       }
+
        pc->mem_cgroup = memcg;
        /*
         * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2443,9 +2495,18 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                break;
        }
 
+       if (lrucare) {
+               if (was_on_lru) {
+                       VM_BUG_ON(PageLRU(page));
+                       SetPageLRU(page);
+                       add_page_to_lru_list(zone, page, page_lru(page));
+               }
+               spin_unlock_irq(&zone->lru_lock);
+       }
+
        mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
        unlock_page_cgroup(pc);
-       WARN_ON_ONCE(PageLRU(page));
+
        /*
         * "charge_statistics" updated event counter. Then, check it.
         * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2643,7 +2704,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
        ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
        if (ret == -ENOMEM)
                return ret;
-       __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false);
        return 0;
 }
 
@@ -2663,35 +2724,6 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
                                        enum charge_type ctype);
 
-static void
-__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
-                                       enum charge_type ctype)
-{
-       struct page_cgroup *pc = lookup_page_cgroup(page);
-       struct zone *zone = page_zone(page);
-       unsigned long flags;
-       bool removed = false;
-
-       /*
-        * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
-        * is already on LRU. It means the page may on some other page_cgroup's
-        * LRU. Take care of it.
-        */
-       spin_lock_irqsave(&zone->lru_lock, flags);
-       if (PageLRU(page)) {
-               del_page_from_lru_list(zone, page, page_lru(page));
-               ClearPageLRU(page);
-               removed = true;
-       }
-       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
-       if (removed) {
-               add_page_to_lru_list(zone, page, page_lru(page));
-               SetPageLRU(page);
-       }
-       spin_unlock_irqrestore(&zone->lru_lock, flags);
-       return;
-}
-
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
@@ -2769,13 +2801,16 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
                                        enum charge_type ctype)
 {
+       struct page_cgroup *pc;
+
        if (mem_cgroup_disabled())
                return;
        if (!memcg)
                return;
        cgroup_exclude_rmdir(&memcg->css);
 
-       __mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
+       pc = lookup_page_cgroup(page);
+       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true);
        /*
         * Now swap is on-memory. This means this page may be
         * counted both as mem and swap....double count.
@@ -3027,23 +3062,6 @@ void mem_cgroup_uncharge_end(void)
        batch->memcg = NULL;
 }
 
-/*
- * A function for resetting pc->mem_cgroup for newly allocated pages.
- * This function should be called if the newpage will be added to LRU
- * before start accounting.
- */
-void mem_cgroup_reset_owner(struct page *newpage)
-{
-       struct page_cgroup *pc;
-
-       if (mem_cgroup_disabled())
-               return;
-
-       pc = lookup_page_cgroup(newpage);
-       VM_BUG_ON(PageCgroupUsed(pc));
-       pc->mem_cgroup = root_mem_cgroup;
-}
-
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
@@ -3248,7 +3266,7 @@ int mem_cgroup_prepare_migration(struct page *page,
                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
        else
                ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false);
        return ret;
 }
 
@@ -3332,7 +3350,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
         * the newpage may be on LRU(or pagevec for LRU) already. We lock
         * LRU while we overwrite pc->mem_cgroup.
         */
-       __mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
+       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -4782,6 +4800,27 @@ out_free:
 }
 
 /*
+ * Helpers for freeing a vzalloc()ed mem_cgroup by RCU,
+ * but in process context.  The work_freeing structure is overlaid
+ * on the rcu_freeing structure, which itself is overlaid on memsw.
+ */
+static void vfree_work(struct work_struct *work)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = container_of(work, struct mem_cgroup, work_freeing);
+       vfree(memcg);
+}
+static void vfree_rcu(struct rcu_head *rcu_head)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
+       INIT_WORK(&memcg->work_freeing, vfree_work);
+       schedule_work(&memcg->work_freeing);
+}
+
+/*
  * At destroying mem_cgroup, references from swap_cgroup can remain.
  * (scanning all at force_empty is too costly...)
  *
@@ -4804,9 +4843,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 
        free_percpu(memcg->stat);
        if (sizeof(struct mem_cgroup) < PAGE_SIZE)
-               kfree(memcg);
+               kfree_rcu(memcg, rcu_freeing);
        else
-               vfree(memcg);
+               call_rcu(&memcg->rcu_freeing, vfree_rcu);
 }
 
 static void mem_cgroup_get(struct mem_cgroup *memcg)
index 06b145f..47296fe 100644 (file)
@@ -640,10 +640,11 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
        unsigned long vmstart;
        unsigned long vmend;
 
-       vma = find_vma_prev(mm, start, &prev);
+       vma = find_vma(mm, start);
        if (!vma || vma->vm_start > start)
                return -EFAULT;
 
+       prev = vma->vm_prev;
        if (start > vma->vm_start)
                prev = vma;
 
index df141f6..1503b6b 100644 (file)
@@ -839,8 +839,6 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        if (!newpage)
                return -ENOMEM;
 
-       mem_cgroup_reset_owner(newpage);
-
        if (page_count(page) == 1) {
                /* page was freed from under us. So we are done. */
                goto out;
index 4f4f53b..ef726e8 100644 (file)
@@ -385,10 +385,11 @@ static int do_mlock(unsigned long start, size_t len, int on)
                return -EINVAL;
        if (end == start)
                return 0;
-       vma = find_vma_prev(current->mm, start, &prev);
+       vma = find_vma(current->mm, start);
        if (!vma || vma->vm_start > start)
                return -ENOMEM;
 
+       prev = vma->vm_prev;
        if (start > vma->vm_start)
                prev = vma;
 
index 3f758c7..da15a79 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1266,8 +1266,9 @@ munmap_back:
        vma->vm_pgoff = pgoff;
        INIT_LIST_HEAD(&vma->anon_vma_chain);
 
+       error = -EINVAL;        /* when rejecting VM_GROWSDOWN|VM_GROWSUP */
+
        if (file) {
-               error = -EINVAL;
                if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
                        goto free_vma;
                if (vm_flags & VM_DENYWRITE) {
@@ -1293,6 +1294,8 @@ munmap_back:
                pgoff = vma->vm_pgoff;
                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
+               if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
+                       goto free_vma;
                error = shmem_zero_setup(vma);
                if (error)
                        goto free_vma;
@@ -1605,7 +1608,6 @@ EXPORT_SYMBOL(find_vma);
 
 /*
  * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
- * Note: pprev is set to NULL when return value is NULL.
  */
 struct vm_area_struct *
 find_vma_prev(struct mm_struct *mm, unsigned long addr,
@@ -1614,7 +1616,16 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
        struct vm_area_struct *vma;
 
        vma = find_vma(mm, addr);
-       *pprev = vma ? vma->vm_prev : NULL;
+       if (vma) {
+               *pprev = vma->vm_prev;
+       } else {
+               struct rb_node *rb_node = mm->mm_rb.rb_node;
+               *pprev = NULL;
+               while (rb_node) {
+                       *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+                       rb_node = rb_node->rb_right;
+               }
+       }
        return vma;
 }
 
index 5a688a2..f437d05 100644 (file)
@@ -262,10 +262,11 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 
        down_write(&current->mm->mmap_sem);
 
-       vma = find_vma_prev(current->mm, start, &prev);
+       vma = find_vma(current->mm, start);
        error = -ENOMEM;
        if (!vma)
                goto out;
+       prev = vma->vm_prev;
        if (unlikely(grows & PROT_GROWSDOWN)) {
                if (vma->vm_start >= end)
                        goto out;
index de1616a..1ccbd71 100644 (file)
@@ -379,13 +379,15 @@ static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
        pgoff_t offset = swp_offset(ent);
        struct swap_cgroup_ctrl *ctrl;
        struct page *mappage;
+       struct swap_cgroup *sc;
 
        ctrl = &swap_cgroup_ctrl[swp_type(ent)];
        if (ctrlp)
                *ctrlp = ctrl;
 
        mappage = ctrl->map[offset / SC_PER_PAGE];
-       return page_address(mappage) + offset % SC_PER_PAGE;
+       sc = page_address(mappage);
+       return sc + offset % SC_PER_PAGE;
 }
 
 /**
index 12a48a8..405d331 100644 (file)
@@ -184,8 +184,7 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
                                   page_end - page_start);
        }
 
-       for (i = page_start; i < page_end; i++)
-               __clear_bit(i, populated);
+       bitmap_clear(populated, page_start, page_end - page_start);
 }
 
 /**
index fff1ff7..14380e9 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -652,7 +652,7 @@ EXPORT_SYMBOL(__pagevec_release);
 void lru_add_page_tail(struct zone* zone,
                       struct page *page, struct page *page_tail)
 {
-       int active;
+       int uninitialized_var(active);
        enum lru_list lru;
        const int file = 0;
 
@@ -672,7 +672,6 @@ void lru_add_page_tail(struct zone* zone,
                        active = 0;
                        lru = LRU_INACTIVE_ANON;
                }
-               update_page_reclaim_stat(zone, page_tail, file, active);
        } else {
                SetPageUnevictable(page_tail);
                lru = LRU_UNEVICTABLE;
@@ -693,6 +692,9 @@ void lru_add_page_tail(struct zone* zone,
                list_head = page_tail->lru.prev;
                list_move_tail(&page_tail->lru, list_head);
        }
+
+       if (!PageUnevictable(page))
+               update_page_reclaim_stat(zone, page_tail, file, active);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -710,8 +712,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg)
        SetPageLRU(page);
        if (active)
                SetPageActive(page);
-       update_page_reclaim_stat(zone, page, file, active);
        add_page_to_lru_list(zone, page, lru);
+       update_page_reclaim_stat(zone, page, file, active);
 }
 
 /*
index 470038a..ea6b32d 100644 (file)
@@ -300,16 +300,6 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                        new_page = alloc_page_vma(gfp_mask, vma, addr);
                        if (!new_page)
                                break;          /* Out of memory */
-                       /*
-                        * The memcg-specific accounting when moving
-                        * pages around the LRU lists relies on the
-                        * page's owner (memcg) to be valid.  Usually,
-                        * pages are assigned to a new owner before
-                        * being put on the LRU list, but since this
-                        * is not the case here, the stale owner from
-                        * a previous allocation cycle must be reset.
-                        */
-                       mem_cgroup_reset_owner(new_page);
                }
 
                /*
index 568d5bf..702a1ae 100644 (file)
@@ -446,8 +446,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        ip6h->nexthdr = IPPROTO_HOPOPTS;
        ip6h->hop_limit = 1;
        ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
-       ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
-                          &ip6h->saddr);
+       if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+                              &ip6h->saddr)) {
+               kfree_skb(skb);
+               return NULL;
+       }
        ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
        hopopt = (u8 *)(ip6h + 1);
index 8412247..dec4f38 100644 (file)
@@ -62,6 +62,15 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
 #define brnf_filter_pppoe_tagged 0
 #endif
 
+#define IS_IP(skb) \
+       (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
+
+#define IS_IPV6(skb) \
+       (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
+
+#define IS_ARP(skb) \
+       (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
+
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
        if (vlan_tx_tag_present(skb))
@@ -639,8 +648,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
                return NF_DROP;
        br = p->br;
 
-       if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-           IS_PPPOE_IPV6(skb)) {
+       if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
                if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
                        return NF_ACCEPT;
 
@@ -651,8 +659,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
        if (!brnf_call_iptables && !br->nf_call_iptables)
                return NF_ACCEPT;
 
-       if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
-           !IS_PPPOE_IP(skb))
+       if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
                return NF_ACCEPT;
 
        nf_bridge_pull_encap_header_rcsum(skb);
@@ -701,7 +708,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
        struct nf_bridge_info *nf_bridge = skb->nf_bridge;
        struct net_device *in;
 
-       if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) {
+       if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
                in = nf_bridge->physindev;
                if (nf_bridge->mask & BRNF_PKT_TYPE) {
                        skb->pkt_type = PACKET_OTHERHOST;
@@ -718,6 +725,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
        return 0;
 }
 
+
 /* This is the 'purely bridged' case.  For IP, we pass the packet to
  * netfilter with indev and outdev set to the bridge device,
  * but we are still able to filter on the 'real' indev/outdev
@@ -744,11 +752,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
        if (!parent)
                return NF_DROP;
 
-       if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
-           IS_PPPOE_IP(skb))
+       if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
                pf = PF_INET;
-       else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-                IS_PPPOE_IPV6(skb))
+       else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
                pf = PF_INET6;
        else
                return NF_ACCEPT;
@@ -795,7 +801,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
        if (!brnf_call_arptables && !br->nf_call_arptables)
                return NF_ACCEPT;
 
-       if (skb->protocol != htons(ETH_P_ARP)) {
+       if (!IS_ARP(skb)) {
                if (!IS_VLAN_ARP(skb))
                        return NF_ACCEPT;
                nf_bridge_pull_encap_header(skb);
@@ -853,11 +859,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
        if (!realoutdev)
                return NF_DROP;
 
-       if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
-           IS_PPPOE_IP(skb))
+       if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
                pf = PF_INET;
-       else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-                IS_PPPOE_IPV6(skb))
+       else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
                pf = PF_INET6;
        else
                return NF_ACCEPT;
index dd147d7..8c836d9 100644 (file)
@@ -17,9 +17,9 @@
 #include "br_private_stp.h"
 
 /* since time values in bpdu are in jiffies and then scaled (1/256)
- * before sending, make sure that is at least one.
+ * before sending, make sure that is at least one STP tick.
  */
-#define MESSAGE_AGE_INCR       ((HZ < 256) ? 1 : (HZ/256))
+#define MESSAGE_AGE_INCR       ((HZ / 256) + 1)
 
 static const char *const br_port_state_names[] = {
        [BR_STATE_DISABLED] = "disabled",
@@ -31,7 +31,7 @@ static const char *const br_port_state_names[] = {
 
 void br_log_state(const struct net_bridge_port *p)
 {
-       br_info(p->br, "port %u(%s) entering %s state\n",
+       br_info(p->br, "port %u(%s) entered %s state\n",
                (unsigned) p->port_no, p->dev->name,
                br_port_state_names[p->state]);
 }
@@ -186,7 +186,7 @@ static void br_record_config_information(struct net_bridge_port *p,
        p->designated_cost = bpdu->root_path_cost;
        p->designated_bridge = bpdu->bridge_id;
        p->designated_port = bpdu->port_id;
-       p->designated_age = jiffies + bpdu->message_age;
+       p->designated_age = jiffies - bpdu->message_age;
 
        mod_timer(&p->message_age_timer, jiffies
                  + (p->br->max_age - bpdu->message_age));
index 19308e3..f494496 100644 (file)
@@ -98,14 +98,13 @@ void br_stp_disable_port(struct net_bridge_port *p)
        struct net_bridge *br = p->br;
        int wasroot;
 
-       br_log_state(p);
-
        wasroot = br_is_root_bridge(br);
        br_become_designated_port(p);
        p->state = BR_STATE_DISABLED;
        p->topology_change_ack = 0;
        p->config_pending = 0;
 
+       br_log_state(p);
        br_ifinfo_notify(RTM_NEWLINK, p);
 
        del_timer(&p->message_age_timer);
index 5864cc4..5fe2ff3 100644 (file)
@@ -1335,7 +1335,12 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
     const char *base, char __user *ubase)
 {
        char __user *hlp = ubase + ((char *)m - base);
-       if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN))
+       char name[EBT_FUNCTION_MAXNAMELEN] = {};
+
+       /* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+          long. Copy 29 bytes and fill remaining bytes with zeroes. */
+       strncpy(name, m->u.match->name, sizeof(name));
+       if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
                return -EFAULT;
        return 0;
 }
@@ -1344,7 +1349,10 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
     const char *base, char __user *ubase)
 {
        char __user *hlp = ubase + ((char *)w - base);
-       if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN))
+       char name[EBT_FUNCTION_MAXNAMELEN] = {};
+
+       strncpy(name, w->u.watcher->name, sizeof(name));
+       if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
                return -EFAULT;
        return 0;
 }
@@ -1355,6 +1363,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
        int ret;
        char __user *hlp;
        const struct ebt_entry_target *t;
+       char name[EBT_FUNCTION_MAXNAMELEN] = {};
 
        if (e->bitmask == 0)
                return 0;
@@ -1368,7 +1377,8 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
        ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
        if (ret != 0)
                return ret;
-       if (copy_to_user(hlp, t->u.target->name, EBT_FUNCTION_MAXNAMELEN))
+       strncpy(name, t->u.target->name, sizeof(name));
+       if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
                return -EFAULT;
        return 0;
 }
@@ -1893,10 +1903,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 
        switch (compat_mwt) {
        case EBT_COMPAT_MATCH:
-               match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
-                                               name, 0), "ebt_%s", name);
-               if (match == NULL)
-                       return -ENOENT;
+               match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
                if (IS_ERR(match))
                        return PTR_ERR(match);
 
@@ -1915,10 +1922,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
                break;
        case EBT_COMPAT_WATCHER: /* fallthrough */
        case EBT_COMPAT_TARGET:
-               wt = try_then_request_module(xt_find_target(NFPROTO_BRIDGE,
-                                               name, 0), "ebt_%s", name);
-               if (wt == NULL)
-                       return -ENOENT;
+               wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
                if (IS_ERR(wt))
                        return PTR_ERR(wt);
                off = xt_compat_target_offset(wt);
index 6ca32f6..6982bfd 100644 (file)
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/net_tstamp.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 #include <net/flow_keys.h>
 
 #include "net-sysfs.h"
@@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
-static struct jump_label_key netstamp_needed __read_mostly;
+static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
-/* We are not allowed to call jump_label_dec() from irq context
+/* We are not allowed to call static_key_slow_dec() from irq context
  * If net_disable_timestamp() is called from irq context, defer the
- * jump_label_dec() calls.
+ * static_key_slow_dec() calls.
  */
 static atomic_t netstamp_needed_deferred;
 #endif
@@ -1457,12 +1457,12 @@ void net_enable_timestamp(void)
 
        if (deferred) {
                while (--deferred)
-                       jump_label_dec(&netstamp_needed);
+                       static_key_slow_dec(&netstamp_needed);
                return;
        }
 #endif
        WARN_ON(in_interrupt());
-       jump_label_inc(&netstamp_needed);
+       static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
 
@@ -1474,19 +1474,19 @@ void net_disable_timestamp(void)
                return;
        }
 #endif
-       jump_label_dec(&netstamp_needed);
+       static_key_slow_dec(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
        skb->tstamp.tv64 = 0;
-       if (static_branch(&netstamp_needed))
+       if (static_key_false(&netstamp_needed))
                __net_timestamp(skb);
 }
 
 #define net_timestamp_check(COND, SKB)                 \
-       if (static_branch(&netstamp_needed)) {          \
+       if (static_key_false(&netstamp_needed)) {               \
                if ((COND) && !(SKB)->tstamp.tv64)      \
                        __net_timestamp(SKB);           \
        }                                               \
@@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
-struct jump_label_key rps_needed __read_mostly;
+struct static_key rps_needed __read_mostly;
 
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb)
 
        trace_netif_rx(skb);
 #ifdef CONFIG_RPS
-       if (static_branch(&rps_needed)) {
+       if (static_key_false(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
                int cpu;
 
@@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb)
                return NET_RX_SUCCESS;
 
 #ifdef CONFIG_RPS
-       if (static_branch(&rps_needed)) {
+       if (static_key_false(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
                int cpu, ret;
 
index a1727cd..4955862 100644 (file)
@@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
        spin_unlock(&rps_map_lock);
 
        if (map)
-               jump_label_inc(&rps_needed);
+               static_key_slow_inc(&rps_needed);
        if (old_map) {
                kfree_rcu(old_map, rcu);
-               jump_label_dec(&rps_needed);
+               static_key_slow_dec(&rps_needed);
        }
        free_cpumask_var(mask);
        return len;
index 606a6e8..f965dce 100644 (file)
@@ -1060,11 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        rcu_read_lock();
        cb->seq = net->dev_base_seq;
 
-       nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
-                   ifla_policy);
+       if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+                       ifla_policy) >= 0) {
 
-       if (tb[IFLA_EXT_MASK])
-               ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+               if (tb[IFLA_EXT_MASK])
+                       ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+       }
 
        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                idx = 0;
@@ -1900,10 +1901,11 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
        u32 ext_filter_mask = 0;
        u16 min_ifinfo_dump_size = 0;
 
-       nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
-
-       if (tb[IFLA_EXT_MASK])
-               ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+       if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+                       ifla_policy) >= 0) {
+               if (tb[IFLA_EXT_MASK])
+                       ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+       }
 
        if (!ext_filter_mask)
                return NLMSG_GOODSIZE;
index 02f8dfe..95aff9c 100644 (file)
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/user_namespace.h>
-#include <linux/jump_label.h>
+#include <linux/static_key.h>
 #include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
@@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-struct jump_label_key memcg_socket_limit_enabled;
+struct static_key memcg_socket_limit_enabled;
 EXPORT_SYMBOL(memcg_socket_limit_enabled);
 
 /*
index d05559d..0c28508 100644 (file)
@@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
                if (sock_table != orig_sock_table) {
                        rcu_assign_pointer(rps_sock_flow_table, sock_table);
                        if (sock_table)
-                               jump_label_inc(&rps_needed);
+                               static_key_slow_inc(&rps_needed);
                        if (orig_sock_table) {
-                               jump_label_dec(&rps_needed);
+                               static_key_slow_dec(&rps_needed);
                                synchronize_rcu();
                                vfree(orig_sock_table);
                        }
index 86f3b88..c48adc5 100644 (file)
@@ -1857,11 +1857,6 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
        return CIPSO_V4_HDR_LEN + ret_val;
 }
 
-static void opt_kfree_rcu(struct rcu_head *head)
-{
-       kfree(container_of(head, struct ip_options_rcu, rcu));
-}
-
 /**
  * cipso_v4_sock_setattr - Add a CIPSO option to a socket
  * @sk: the socket
@@ -1938,7 +1933,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
        }
        rcu_assign_pointer(sk_inet->inet_opt, opt);
        if (old)
-               call_rcu(&old->rcu, opt_kfree_rcu);
+               kfree_rcu(old, rcu);
 
        return 0;
 
@@ -2005,7 +2000,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
        req_inet = inet_rsk(req);
        opt = xchg(&req_inet->opt, opt);
        if (opt)
-               call_rcu(&opt->rcu, opt_kfree_rcu);
+               kfree_rcu(opt, rcu);
 
        return 0;
 
@@ -2075,7 +2070,7 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
                 * remove the entire option struct */
                *opt_ptr = NULL;
                hdr_delta = opt->opt.optlen;
-               call_rcu(&opt->rcu, opt_kfree_rcu);
+               kfree_rcu(opt, rcu);
        }
 
        return hdr_delta;
index bf4a9c4..d4d61b6 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <linux/workqueue.h>
 #include <net/ip.h>
 #include <net/inetpeer.h>
 #include <net/secure_seq.h>
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
+static LIST_HEAD(gc_list);
+static const int gc_delay = 60 * HZ;
+static struct delayed_work gc_work;
+static DEFINE_SPINLOCK(gc_lock);
+
 #define node_height(x) x->avl_height
 
 #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
@@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128;       /* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;     /* usual time to live: 10 min */
 
+static void inetpeer_gc_worker(struct work_struct *work)
+{
+       struct inet_peer *p, *n;
+       LIST_HEAD(list);
+
+       spin_lock_bh(&gc_lock);
+       list_replace_init(&gc_list, &list);
+       spin_unlock_bh(&gc_lock);
+
+       if (list_empty(&list))
+               return;
+
+       list_for_each_entry_safe(p, n, &list, gc_list) {
+
+               if(need_resched())
+                       cond_resched();
+
+               if (p->avl_left != peer_avl_empty) {
+                       list_add_tail(&p->avl_left->gc_list, &list);
+                       p->avl_left = peer_avl_empty;
+               }
+
+               if (p->avl_right != peer_avl_empty) {
+                       list_add_tail(&p->avl_right->gc_list, &list);
+                       p->avl_right = peer_avl_empty;
+               }
+
+               n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
+
+               if (!atomic_read(&p->refcnt)) {
+                       list_del(&p->gc_list);
+                       kmem_cache_free(peer_cachep, p);
+               }
+       }
+
+       if (list_empty(&list))
+               return;
+
+       spin_lock_bh(&gc_lock);
+       list_splice(&list, &gc_list);
+       spin_unlock_bh(&gc_lock);
+
+       schedule_delayed_work(&gc_work, gc_delay);
+}
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -126,6 +176,7 @@ void __init inet_initpeers(void)
                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
                        NULL);
 
+       INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -447,9 +498,8 @@ relookup:
                p->rate_last = 0;
                p->pmtu_expires = 0;
                p->pmtu_orig = 0;
-               p->redirect_genid = 0;
                memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
-
+               INIT_LIST_HEAD(&p->gc_list);
 
                /* Link the node. */
                link_to_pool(p, base);
@@ -509,3 +559,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
        return rc;
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
+
+void inetpeer_invalidate_tree(int family)
+{
+       struct inet_peer *old, *new, *prev;
+       struct inet_peer_base *base = family_to_base(family);
+
+       write_seqlock_bh(&base->lock);
+
+       old = base->root;
+       if (old == peer_avl_empty_rcu)
+               goto out;
+
+       new = peer_avl_empty_rcu;
+
+       prev = cmpxchg(&base->root, old, new);
+       if (prev == old) {
+               base->total = 0;
+               spin_lock(&gc_lock);
+               list_add_tail(&prev->gc_list, &gc_list);
+               spin_unlock(&gc_lock);
+               schedule_delayed_work(&gc_work, gc_delay);
+       }
+
+out:
+       write_sequnlock_bh(&base->lock);
+}
+EXPORT_SYMBOL(inetpeer_invalidate_tree);
index 8aa87c1..5343d9a 100644 (file)
@@ -445,11 +445,6 @@ out:
 }
 
 
-static void opt_kfree_rcu(struct rcu_head *head)
-{
-       kfree(container_of(head, struct ip_options_rcu, rcu));
-}
-
 /*
  *     Socket option code for IP. This is the end of the line after any
  *     TCP,UDP etc options on an IP socket.
@@ -525,7 +520,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                }
                rcu_assign_pointer(inet->inet_opt, opt);
                if (old)
-                       call_rcu(&old->rcu, opt_kfree_rcu);
+                       kfree_rcu(old, rcu);
                break;
        }
        case IP_PKTINFO:
index bcacf54..0197747 100644 (file)
@@ -132,7 +132,6 @@ static int ip_rt_mtu_expires __read_mostly  = 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly                = 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly      = 256;
 static int rt_chain_length_max __read_mostly   = 20;
-static int redirect_genid;
 
 static struct delayed_work expires_work;
 static unsigned long expires_ljiffies;
@@ -937,7 +936,7 @@ static void rt_cache_invalidate(struct net *net)
 
        get_random_bytes(&shuffle, sizeof(shuffle));
        atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-       redirect_genid++;
+       inetpeer_invalidate_tree(AF_INET);
 }
 
 /*
@@ -1485,10 +1484,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
                                peer = rt->peer;
                                if (peer) {
-                                       if (peer->redirect_learned.a4 != new_gw ||
-                                           peer->redirect_genid != redirect_genid) {
+                                       if (peer->redirect_learned.a4 != new_gw) {
                                                peer->redirect_learned.a4 = new_gw;
-                                               peer->redirect_genid = redirect_genid;
                                                atomic_inc(&__rt_peer_genid);
                                        }
                                        check_peer_redir(&rt->dst, peer);
@@ -1793,8 +1790,6 @@ static void ipv4_validate_peer(struct rtable *rt)
                if (peer) {
                        check_peer_pmtu(&rt->dst, peer);
 
-                       if (peer->redirect_genid != redirect_genid)
-                               peer->redirect_learned.a4 = 0;
                        if (peer->redirect_learned.a4 &&
                            peer->redirect_learned.a4 != rt->rt_gateway)
                                check_peer_redir(&rt->dst, peer);
@@ -1958,8 +1953,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
                dst_init_metrics(&rt->dst, peer->metrics, false);
 
                check_peer_pmtu(&rt->dst, peer);
-               if (peer->redirect_genid != redirect_genid)
-                       peer->redirect_learned.a4 = 0;
+
                if (peer->redirect_learned.a4 &&
                    peer->redirect_learned.a4 != rt->rt_gateway) {
                        rt->rt_gateway = peer->redirect_learned.a4;
index 51fdbb4..eab2a7f 100644 (file)
@@ -278,6 +278,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        struct rtable *rt;
        __u8 rcv_wscale;
        bool ecn_ok = false;
+       struct flowi4 fl4;
 
        if (!sysctl_tcp_syncookies || !th->ack || th->rst)
                goto out;
@@ -346,20 +347,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
         * hasn't changed since we received the original syn, but I see
         * no easy way to do this.
         */
-       {
-               struct flowi4 fl4;
-
-               flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
-                                  RT_SCOPE_UNIVERSE, IPPROTO_TCP,
-                                  inet_sk_flowi_flags(sk),
-                                  (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
-                                  ireq->loc_addr, th->source, th->dest);
-               security_req_classify_flow(req, flowi4_to_flowi(&fl4));
-               rt = ip_route_output_key(sock_net(sk), &fl4);
-               if (IS_ERR(rt)) {
-                       reqsk_free(req);
-                       goto out;
-               }
+       flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
+                          RT_SCOPE_UNIVERSE, IPPROTO_TCP,
+                          inet_sk_flowi_flags(sk),
+                          (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
+                          ireq->loc_addr, th->source, th->dest);
+       security_req_classify_flow(req, flowi4_to_flowi(&fl4));
+       rt = ip_route_output_key(sock_net(sk), &fl4);
+       if (IS_ERR(rt)) {
+               reqsk_free(req);
+               goto out;
        }
 
        /* Try to redo what tcp_v4_send_synack did. */
@@ -373,5 +370,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        ireq->rcv_wscale  = rcv_wscale;
 
        ret = get_cookie_sock(sk, skb, req, &rt->dst);
+       /* ip_queue_xmit() depends on our flow being setup
+        * Normal sockets get it right from inet_csk_route_child_sock()
+        */
+       if (ret)
+               inet_sk(ret)->cork.fl.u.ip4 = fl4;
 out:   return ret;
 }
index 53c8ce4..b5e315f 100644 (file)
@@ -1403,8 +1403,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 
        BUG_ON(!pcount);
 
-       /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
-       if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
+       /* Adjust counters and hints for the newly sacked sequence
+        * range but discard the return value since prev is already
+        * marked. We must tag the range first because the seq
+        * advancement below implicitly advances
+        * tcp_highest_sack_seq() when skb is highest_sack.
+        */
+       tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+                       start_seq, end_seq, dup_sack, pcount);
+
+       if (skb == tp->lost_skb_hint)
                tp->lost_cnt_hint += pcount;
 
        TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1430,12 +1438,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                skb_shinfo(skb)->gso_type = 0;
        }
 
-       /* Adjust counters and hints for the newly sacked sequence range but
-        * discard the return value since prev is already marked.
-        */
-       tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
-                       start_seq, end_seq, dup_sack, pcount);
-
        /* Difference in this won't matter, both ACKed by the same cumul. ACK */
        TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
 
@@ -1583,6 +1585,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                }
        }
 
+       /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
+       if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
+               goto fallback;
+
        if (!skb_shift(prev, skb, len))
                goto fallback;
        if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -2567,6 +2573,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 
                if (cnt > packets) {
                        if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+                           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                            (oldcnt >= packets))
                                break;
 
index 94d683a..fd54c5f 100644 (file)
@@ -1466,9 +1466,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
        newinet->inet_id = newtp->write_seq ^ jiffies;
 
-       if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
-               goto put_and_exit;
-
+       if (!dst) {
+               dst = inet_csk_route_child_sock(sk, newsk, req);
+               if (!dst)
+                       goto put_and_exit;
+       } else {
+               /* syncookie case : see end of cookie_v4_check() */
+       }
        sk_setup_caps(newsk, dst);
 
        tcp_mtup_init(newsk);
index 4997878..602fb30 100644 (file)
@@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
        val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
 
        if (val != RESOURCE_MAX)
-               jump_label_dec(&memcg_socket_limit_enabled);
+               static_key_slow_dec(&memcg_socket_limit_enabled);
 }
 EXPORT_SYMBOL(tcp_destroy_cgroup);
 
@@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
                                             net->ipv4.sysctl_tcp_mem[i]);
 
        if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
-               jump_label_dec(&memcg_socket_limit_enabled);
+               static_key_slow_dec(&memcg_socket_limit_enabled);
        else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
-               jump_label_inc(&memcg_socket_limit_enabled);
+               static_key_slow_inc(&memcg_socket_limit_enabled);
 
        return 0;
 }
index c02280a..6b8ebc5 100644 (file)
@@ -434,6 +434,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
        /* Join all-node multicast group */
        ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
+       /* Join all-router multicast group if forwarding is set */
+       if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST))
+               ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+
        return ndev;
 }
 
index b853f06..16c33e3 100644 (file)
@@ -257,7 +257,6 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 
                if (rt) {
                        dev = rt->dst.dev;
-                       dev_hold(dev);
                        dst_release(&rt->dst);
                }
        } else
index 8c2e3ab..22b7664 100644 (file)
@@ -1077,7 +1077,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        struct net *net = dev_net(dev);
 
        if (unlikely(!idev))
-               return NULL;
+               return ERR_PTR(-ENODEV);
 
        rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
        if (unlikely(!rt)) {
index 01a21c2..8e2137b 100644 (file)
@@ -1332,6 +1332,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
                hw_roc = true;
 
        list_for_each_entry(sdata, &local->interfaces, list) {
+               if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+                   sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+                       continue;
                if (sdata->old_idle == sdata->vif.bss_conf.idle)
                        continue;
                if (!ieee80211_sdata_running(sdata))
index edf167e..30420bc 100644 (file)
@@ -413,12 +413,6 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data
        return NULL;
 }
 
-static void mesh_gate_node_reclaim(struct rcu_head *rp)
-{
-       struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
-       kfree(node);
-}
-
 /**
  * mesh_path_add_gate - add the given mpath to a mesh gate to our path table
  * @mpath: gate path to add to table
@@ -479,7 +473,7 @@ static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath)
                if (gate->mpath == mpath) {
                        spin_lock_bh(&tbl->gates_lock);
                        hlist_del_rcu(&gate->list);
-                       call_rcu(&gate->rcu, mesh_gate_node_reclaim);
+                       kfree_rcu(gate, rcu);
                        spin_unlock_bh(&tbl->gates_lock);
                        mpath->sdata->u.mesh.num_gates--;
                        mpath->is_gate = false;
index ad64f4d..f9b8e81 100644 (file)
@@ -344,7 +344,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
        for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
                info->control.rates[i].idx = -1;
                info->control.rates[i].flags = 0;
-               info->control.rates[i].count = 1;
+               info->control.rates[i].count = 0;
        }
 
        if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
index b4e8ff0..e1b7e05 100644 (file)
@@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 
 #if defined(CONFIG_JUMP_LABEL)
-struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
@@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg)
        list_add_rcu(&reg->list, elem->list.prev);
        mutex_unlock(&nf_hook_mutex);
 #if defined(CONFIG_JUMP_LABEL)
-       jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+       static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
        return 0;
 }
@@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
        list_del_rcu(&reg->list);
        mutex_unlock(&nf_hook_mutex);
 #if defined(CONFIG_JUMP_LABEL)
-       jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+       static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
        synchronize_net();
 }
index ed86a3b..fa4b82c 100644 (file)
@@ -635,8 +635,12 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 
        if (del_timer(&ct->timeout)) {
                death_by_timeout((unsigned long)ct);
-               dropped = 1;
-               NF_CT_STAT_INC_ATOMIC(net, early_drop);
+               /* Check if we indeed killed this entry. Reliable event
+                  delivery may have inserted it into the dying list. */
+               if (test_bit(IPS_DYING_BIT, &ct->status)) {
+                       dropped = 1;
+                       NF_CT_STAT_INC_ATOMIC(net, early_drop);
+               }
        }
        nf_ct_put(ct);
        return dropped;
index 30c9d4c..b49da6c 100644 (file)
@@ -943,20 +943,21 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
                }
        }
 
-       if (nf_conntrack_event_report(IPCT_DESTROY, ct,
-                                     NETLINK_CB(skb).pid,
-                                     nlmsg_report(nlh)) < 0) {
+       if (del_timer(&ct->timeout)) {
+               if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+                                             NETLINK_CB(skb).pid,
+                                             nlmsg_report(nlh)) < 0) {
+                       nf_ct_delete_from_lists(ct);
+                       /* we failed to report the event, try later */
+                       nf_ct_insert_dying_list(ct);
+                       nf_ct_put(ct);
+                       return 0;
+               }
+               /* death_by_timeout would report the event again */
+               set_bit(IPS_DYING_BIT, &ct->status);
                nf_ct_delete_from_lists(ct);
-               /* we failed to report the event, try later */
-               nf_ct_insert_dying_list(ct);
                nf_ct_put(ct);
-               return 0;
        }
-
-       /* death_by_timeout would report the event again */
-       set_bit(IPS_DYING_BIT, &ct->status);
-
-       nf_ct_kill(ct);
        nf_ct_put(ct);
 
        return 0;
@@ -1041,16 +1042,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
        if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
                rcu_read_unlock();
-               spin_unlock_bh(&nf_conntrack_lock);
                nfnl_unlock();
                if (request_module("nf-nat-ipv4") < 0) {
                        nfnl_lock();
-                       spin_lock_bh(&nf_conntrack_lock);
                        rcu_read_lock();
                        return -EOPNOTSUPP;
                }
                nfnl_lock();
-               spin_lock_bh(&nf_conntrack_lock);
                rcu_read_lock();
                if (nfnetlink_parse_nat_setup_hook)
                        return -EAGAIN;
index 2725d1b..48badff 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 Nicira Networks.
+ * Copyright (c) 2007-2012 Nicira Networks.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -145,9 +145,16 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
                        inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
                                                 *addr, new_addr, 1);
        } else if (nh->protocol == IPPROTO_UDP) {
-               if (likely(transport_len >= sizeof(struct udphdr)))
-                       inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
-                                                *addr, new_addr, 1);
+               if (likely(transport_len >= sizeof(struct udphdr))) {
+                       struct udphdr *uh = udp_hdr(skb);
+
+                       if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+                               inet_proto_csum_replace4(&uh->check, skb,
+                                                        *addr, new_addr, 1);
+                               if (!uh->check)
+                                       uh->check = CSUM_MANGLED_0;
+                       }
+               }
        }
 
        csum_replace4(&nh->check, *addr, new_addr);
@@ -197,8 +204,22 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port,
        skb->rxhash = 0;
 }
 
-static int set_udp_port(struct sk_buff *skb,
-                       const struct ovs_key_udp *udp_port_key)
+static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
+{
+       struct udphdr *uh = udp_hdr(skb);
+
+       if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+               set_tp_port(skb, port, new_port, &uh->check);
+
+               if (!uh->check)
+                       uh->check = CSUM_MANGLED_0;
+       } else {
+               *port = new_port;
+               skb->rxhash = 0;
+       }
+}
+
+static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
 {
        struct udphdr *uh;
        int err;
@@ -210,16 +231,15 @@ static int set_udp_port(struct sk_buff *skb,
 
        uh = udp_hdr(skb);
        if (udp_port_key->udp_src != uh->source)
-               set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
+               set_udp_port(skb, &uh->source, udp_port_key->udp_src);
 
        if (udp_port_key->udp_dst != uh->dest)
-               set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
+               set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
 
        return 0;
 }
 
-static int set_tcp_port(struct sk_buff *skb,
-                       const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 {
        struct tcphdr *th;
        int err;
@@ -328,11 +348,11 @@ static int execute_set_action(struct sk_buff *skb,
                break;
 
        case OVS_KEY_ATTR_TCP:
-               err = set_tcp_port(skb, nla_data(nested_attr));
+               err = set_tcp(skb, nla_data(nested_attr));
                break;
 
        case OVS_KEY_ATTR_UDP:
-               err = set_udp_port(skb, nla_data(nested_attr));
+               err = set_udp(skb, nla_data(nested_attr));
                break;
        }
 
index ce64c18..2c03050 100644 (file)
@@ -1521,6 +1521,9 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header,
                vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
                if (!vport)
                        return ERR_PTR(-ENODEV);
+               if (ovs_header->dp_ifindex &&
+                   ovs_header->dp_ifindex != get_dpifindex(vport->dp))
+                       return ERR_PTR(-ENODEV);
                return vport;
        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
index 60d4718..02a21ab 100644 (file)
@@ -469,11 +469,15 @@ enqueue:
        if (slot->qlen == 1) {          /* The flow is new */
                if (q->tail == NULL) {  /* It is the first flow */
                        slot->next = x;
-                       q->tail = slot;
                } else {
                        slot->next = q->tail->next;
                        q->tail->next = x;
                }
+               /* We put this flow at the end of our flow list.
+                * This might sound unfair for a new flow to wait after old ones,
+                * but we could endup servicing new flows only, and freeze old ones.
+                */
+               q->tail = slot;
                /* We could use a bigger initial quantum for new flows */
                slot->allot = q->scaled_quantum;
        }
index f286bb8..22c73b7 100644 (file)
@@ -2068,12 +2068,16 @@ static int alc_build_controls(struct hda_codec *codec)
  */
 
 static void alc_init_special_input_src(struct hda_codec *codec);
+static int alc269_fill_coef(struct hda_codec *codec);
 
 static int alc_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
        unsigned int i;
 
+       if (codec->vendor_id == 0x10ec0269)
+               alc269_fill_coef(codec);
+
        alc_fix_pll(codec);
        alc_auto_init_amp(codec, spec->init_amp);
 
@@ -4367,6 +4371,7 @@ enum {
        ALC882_FIXUP_PB_M5210,
        ALC882_FIXUP_ACER_ASPIRE_7736,
        ALC882_FIXUP_ASUS_W90V,
+       ALC889_FIXUP_CD,
        ALC889_FIXUP_VAIO_TT,
        ALC888_FIXUP_EEE1601,
        ALC882_FIXUP_EAPD,
@@ -4494,6 +4499,13 @@ static const struct alc_fixup alc882_fixups[] = {
                        { }
                }
        },
+       [ALC889_FIXUP_CD] = {
+               .type = ALC_FIXUP_PINS,
+               .v.pins = (const struct alc_pincfg[]) {
+                       { 0x1c, 0x993301f0 }, /* CD */
+                       { }
+               }
+       },
        [ALC889_FIXUP_VAIO_TT] = {
                .type = ALC_FIXUP_PINS,
                .v.pins = (const struct alc_pincfg[]) {
@@ -4650,6 +4662,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 
        SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD),
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
+       SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3", ALC889_FIXUP_CD),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
        SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
        SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -5467,8 +5480,12 @@ static const struct alc_model_fixup alc269_fixup_models[] = {
 
 static int alc269_fill_coef(struct hda_codec *codec)
 {
+       struct alc_spec *spec = codec->spec;
        int val;
 
+       if (spec->codec_variant != ALC269_TYPE_ALC269VB)
+               return 0;
+
        if ((alc_get_coef0(codec) & 0x00ff) < 0x015) {
                alc_write_coef_idx(codec, 0xf, 0x960b);
                alc_write_coef_idx(codec, 0xe, 0x8817);
index cc9f6c8..bc030a2 100644 (file)
@@ -6333,6 +6333,7 @@ static int __devinit snd_hdspm_create_hwdep(struct snd_card *card,
 
        hw->ops.open = snd_hdspm_hwdep_dummy_op;
        hw->ops.ioctl = snd_hdspm_hwdep_ioctl;
+       hw->ops.ioctl_compat = snd_hdspm_hwdep_ioctl;
        hw->ops.release = snd_hdspm_hwdep_dummy_op;
 
        return 0;
index c6012ff..d23b19a 100644 (file)
@@ -367,7 +367,7 @@ static struct snd_soc_dai_link neo1973_dai[] = {
        .platform_name = "samsung-audio",
        .cpu_dai_name = "s3c24xx-iis",
        .codec_dai_name = "wm8753-hifi",
-       .codec_name = "wm8753-codec.0-001a",
+       .codec_name = "wm8753.0-001a",
        .init = neo1973_wm8753_init,
        .ops = &neo1973_hifi_ops,
 },
@@ -376,7 +376,7 @@ static struct snd_soc_dai_link neo1973_dai[] = {
        .stream_name = "Voice",
        .cpu_dai_name = "dfbmcs320-pcm",
        .codec_dai_name = "wm8753-voice",
-       .codec_name = "wm8753-codec.0-001a",
+       .codec_name = "wm8753.0-001a",
        .ops = &neo1973_voice_ops,
 },
 };
index 4626a39..ca600e0 100644 (file)
@@ -1,3 +1,10 @@
+OUTPUT := ./
+ifeq ("$(origin O)", "command line")
+  ifneq ($(O),)
+       OUTPUT := $(O)/
+  endif
+endif
+
 MAN1_TXT= \
        $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
                $(wildcard perf-*.txt)) \
@@ -6,10 +13,11 @@ MAN5_TXT=
 MAN7_TXT=
 
 MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
-MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
-MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
 
-DOC_HTML=$(MAN_HTML)
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
 
 ARTICLES =
 # with their own formatting rules.
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica
 SP_ARTICLES += $(API_DOCS)
 SP_ARTICLES += technical/api-index
 
-DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+_DOC_HTML = $(_MAN_HTML)
+_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
 
-DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
-DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
-DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
 
 # Make the path relative to DESTDIR, not prefix
 ifndef DESTDIR
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1)
 man5: $(DOC_MAN5)
 man7: $(DOC_MAN7)
 
-info: perf.info perfman.info
+info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
 
-pdf: user-manual.pdf
+pdf: $(OUTPUT)user-manual.pdf
 
 install: install-man
 
@@ -166,7 +180,7 @@ install-man: man
 
 install-info: info
        $(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
-       $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
+       $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
        if test -r $(DESTDIR)$(infodir)/dir; then \
          $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
          $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
@@ -176,7 +190,7 @@ install-info: info
 
 install-pdf: pdf
        $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
-       $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
+       $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
 
 #install-html: html
 #      '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
@@ -189,14 +203,14 @@ install-pdf: pdf
 #
 # Determine "include::" file references in asciidoc files.
 #
-doc.dep : $(wildcard *.txt) build-docdep.perl
+$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
        $(QUIET_GEN)$(RM) $@+ $@ && \
        $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
        mv $@+ $@
 
--include doc.dep
+-include $(OUPTUT)doc.dep
 
-cmds_txt = cmds-ancillaryinterrogators.txt \
+_cmds_txt = cmds-ancillaryinterrogators.txt \
        cmds-ancillarymanipulators.txt \
        cmds-mainporcelain.txt \
        cmds-plumbinginterrogators.txt \
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \
        cmds-synchelpers.txt \
        cmds-purehelpers.txt \
        cmds-foreignscminterface.txt
+cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
 
-$(cmds_txt): cmd-list.made
+$(cmds_txt): $(OUTPUT)cmd-list.made
 
-cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
        $(QUIET_GEN)$(RM) $@ && \
        $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
        date >$@
 
 clean:
-       $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
-       $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
-       $(RM) howto-index.txt howto/*.html doc.dep
-       $(RM) technical/api-*.html technical/api-index.txt
-       $(RM) $(cmds_txt) *.made
-
-$(MAN_HTML): %.html : %.txt
+       $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
+       $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
+       $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
+       $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
+       $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
+       $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
+       $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
+       $(RM) $(cmds_txt) $(OUTPUT)*.made
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
        $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
        $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
                $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
        mv $@+ $@
 
-%.1 %.5 %.7 : %.xml
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
        $(QUIET_XMLTO)$(RM) $@ && \
-       xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+       xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
-%.xml : %.txt
+$(OUTPUT)%.xml : %.txt
        $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
        $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
                $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt
 XSLT = docbook.xsl
 XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
 
-user-manual.html: user-manual.xml
+$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
        $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
 
-perf.info: user-manual.texi
-       $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
+$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
+       $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
 
-user-manual.texi: user-manual.xml
+$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
        $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-       $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+       $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
        $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
        rm $@++ && \
        mv $@+ $@
 
-user-manual.pdf: user-manual.xml
+$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
        $(QUIET_DBLATEX)$(RM) $@+ $@ && \
        $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
        mv $@+ $@
 
-perfman.texi: $(MAN_XML) cat-texi.perl
+$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
        $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
        ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
                --to-stdout $(xml) &&) true) > $@++ && \
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl
        rm $@++ && \
        mv $@+ $@
 
-perfman.info: perfman.texi
+$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
        $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
 
 $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
index d6b2a4f..c7f5f55 100644 (file)
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events
 SYNOPSIS
 --------
 [verse]
-'perf lock' {record|report|trace}
+'perf lock' {record|report|script|info}
 
 DESCRIPTION
 -----------
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command.
   produces the file "perf.data" which contains tracing
   results of lock events.
 
-  'perf lock trace' shows raw lock events.
-
   'perf lock report' reports statistical data.
 
+  'perf lock script' shows raw lock events.
+
+  'perf lock info' shows metadata like threads or addresses
+  of lock instances.
+
 COMMON OPTIONS
 --------------
 
@@ -47,6 +50,17 @@ REPORT OPTIONS
         Sorting key. Possible values: acquired (default), contended,
         wait_total, wait_max, wait_min.
 
+INFO OPTIONS
+------------
+
+-t::
+--threads::
+       dump thread list in perf.data
+
+-m::
+--map::
+       dump map of lock instances (address:name table)
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 2937f7e..a1386b2 100644 (file)
@@ -52,11 +52,15 @@ OPTIONS
 
 -p::
 --pid=::
-       Record events on existing process ID.
+       Record events on existing process ID (comma separated list).
 
 -t::
 --tid=::
-        Record events on existing thread ID.
+        Record events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
 
 -r::
 --realtime=::
@@ -148,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
 corresponding events, i.e., they always refer to events defined earlier on the command
 line.
 
+-b::
+--branch-any::
+Enable taken branch stack sampling. Any type of taken branch may be sampled.
+This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+Enable taken branch stack sampling. Each sample captures a series of consecutive
+taken branches. The number of branches captured with each sample depends on the
+underlying hardware, the type of branches of interest, and the executed code.
+It is possible to select the types of branches captured by enabling filters. The
+following filters are defined:
+
+        - any:  any type of branches
+        - any_call: any function call or system call
+        - any_ret: any function return or system call return
+        - any_ind: any indirect branch
+        - u:  only when the branch target is at the user level
+        - k: only when the branch target is in the kernel
+        - hv: only when the target is at the hypervisor level
+
++
+The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The privilege levels may be ommitted, in which case, the privilege levels of the associated
+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+levels are subject to permissions.  When sampling on multiple events, branch stack sampling
+is enabled for all the sampling events. The sampled branch type is the same for all events.
+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+Note that this feature may not be available on all processors.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 9b430e9..87feeee 100644 (file)
@@ -153,6 +153,16 @@ OPTIONS
        information which may be very large and thus may clutter the display.
        It currently includes: cpu and numa topology of the host system.
 
+-b::
+--branch-stack::
+       Use the addresses of sampled taken branches instead of the instruction
+       address to build the histograms. To generate meaningful output, the
+       perf.data file must have been obtained using perf record -b or
+       perf record --branch-filter xxx where xxx is a branch filter option.
+       perf report is able to auto-detect whether a perf.data file contains
+       branch stacks and it will automatically switch to the branch view mode,
+       unless --no-branch-stack is used.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
index 2f6cef4..e9cbfcd 100644 (file)
@@ -115,7 +115,7 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
@@ -200,6 +200,9 @@ OPTIONS
        It currently includes: cpu and numa topology of the host system.
        It can only be used with the perf script report mode.
 
+--show-kernel-path::
+       Try to resolve the path of [kernel.kallsyms]
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
index 8966b9a..2fa173b 100644 (file)
@@ -35,11 +35,11 @@ OPTIONS
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing process id
+        stat events on existing process id (comma separated list)
 
 -t::
 --tid=<tid>::
-        stat events on existing thread id
+        stat events on existing thread id (comma separated list)
 
 
 -a::
index b1a5bbb..4a5680c 100644 (file)
@@ -72,11 +72,15 @@ Default is to monitor all CPUS.
 
 -p <pid>::
 --pid=<pid>::
-       Profile events on existing Process ID.
+       Profile events on existing Process ID (comma separated list).
 
 -t <tid>::
 --tid=<tid>::
-        Profile events on existing thread ID.
+        Profile events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
 
 -r <priority>::
 --realtime=<priority>::
index 1078c5f..5476bc0 100644 (file)
@@ -9,6 +9,7 @@ lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
 arch/*/lib/memcpy*.S
+arch/*/lib/memset*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
index 7c12650..74fd7f8 100644 (file)
@@ -15,6 +15,16 @@ endif
 
 # Define V to have a more verbose compile.
 #
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
 # Define PYTHON to point to the python binary if the default
 # `python' is not correct; for example: PYTHON=python2
 #
@@ -32,6 +42,10 @@ endif
 # Define NO_DWARF if you do not want debug-info analysis feature at all.
 #
 # Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
 
 $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
        @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
@@ -61,7 +75,7 @@ ifeq ($(ARCH),x86_64)
        ifeq (${IS_X86_64}, 1)
                RAW_ARCH := x86_64
                ARCH_CFLAGS := -DARCH_X86_64
-               ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+               ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
        endif
 endif
 
@@ -183,7 +197,10 @@ SCRIPT_SH += perf-archive.sh
 grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
-$(OUTPUT)python/perf.so: $(PYRF_OBJS)
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+
+$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
        $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
          --quiet build_ext; \
        mkdir -p $(OUTPUT)python && \
@@ -249,6 +266,8 @@ LIB_H += util/include/asm/uaccess.h
 LIB_H += util/include/dwarf-regs.h
 LIB_H += util/include/asm/dwarf2.h
 LIB_H += util/include/asm/cpufeature.h
+LIB_H += util/include/asm/unistd_32.h
+LIB_H += util/include/asm/unistd_64.h
 LIB_H += perf.h
 LIB_H += util/annotate.h
 LIB_H += util/cache.h
@@ -256,6 +275,7 @@ LIB_H += util/callchain.h
 LIB_H += util/build-id.h
 LIB_H += util/debug.h
 LIB_H += util/debugfs.h
+LIB_H += util/sysfs.h
 LIB_H += util/event.h
 LIB_H += util/evsel.h
 LIB_H += util/evlist.h
@@ -302,6 +322,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o
 LIB_OBJS += $(OUTPUT)util/config.o
 LIB_OBJS += $(OUTPUT)util/ctype.o
 LIB_OBJS += $(OUTPUT)util/debugfs.o
+LIB_OBJS += $(OUTPUT)util/sysfs.o
 LIB_OBJS += $(OUTPUT)util/environment.o
 LIB_OBJS += $(OUTPUT)util/event.o
 LIB_OBJS += $(OUTPUT)util/evlist.o
@@ -359,8 +380,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
 ifeq ($(RAW_ARCH),x86_64)
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
 endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
 BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
@@ -792,7 +815,6 @@ help:
        @echo '  quick-install-html     - install the html documentation quickly'
        @echo ''
        @echo 'Perf maintainer targets:'
-       @echo '  distclean              - alias to clean'
        @echo '  clean                  - clean all binary objects and build output'
 
 doc:
index eba80c2..2f7073d 100644 (file)
@@ -25,7 +25,7 @@ get_cpuid(char *buffer, size_t sz)
 
        pvr = mfspr(SPRN_PVR);
 
-       nb = snprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
+       nb = scnprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
 
        /* look for end marker to ensure the entire data fit */
        if (strchr(buffer, '$')) {
index f940060..146d12a 100644 (file)
@@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz)
                if (family >= 0x6)
                        model += ((a >> 16) & 0xf) << 4;
        }
-       nb = snprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
+       nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
 
        /* look for end marker to ensure the entire data fit */
        if (strchr(buffer, '$')) {
index f7781c6..a09bece 100644 (file)
@@ -4,6 +4,7 @@
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
 extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
index d588b87..d66ab79 100644 (file)
@@ -2,3 +2,11 @@
 MEMCPY_FN(__memcpy,
        "x86-64-unrolled",
        "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c,
+       "x86-64-movsq",
+       "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c_e,
+       "x86-64-movsb",
+       "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
index 185a96d..fcd9cf0 100644 (file)
@@ -1,4 +1,8 @@
-
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemcpy_c globl memcpy_c; memcpy_c
+#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
index db82021..7155722 100644 (file)
@@ -5,7 +5,6 @@
  *
  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
-#include <ctype.h>
 
 #include "../perf.h"
 #include "../util/util.h"
@@ -24,6 +23,7 @@
 
 static const char      *length_str     = "1MB";
 static const char      *routine        = "default";
+static int             iterations      = 1;
 static bool            use_clock;
 static int             clock_fd;
 static bool            only_prefault;
@@ -35,6 +35,8 @@ static const struct option options[] = {
                    "available unit: B, MB, GB (upper and lower)"),
        OPT_STRING('r', "routine", &routine, "default",
                    "Specify routine to copy"),
+       OPT_INTEGER('i', "iterations", &iterations,
+                   "repeat memcpy() invocation this number of times"),
        OPT_BOOLEAN('c', "clock", &use_clock,
                    "Use CPU clock for measuring"),
        OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 {
        u64 clock_start = 0ULL, clock_end = 0ULL;
        void *src = NULL, *dst = NULL;
+       int i;
 
        alloc_mem(&src, &dst, len);
 
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
                fn(dst, src, len);
 
        clock_start = get_clock();
-       fn(dst, src, len);
+       for (i = 0; i < iterations; ++i)
+               fn(dst, src, len);
        clock_end = get_clock();
 
        free(src);
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 {
        struct timeval tv_start, tv_end, tv_diff;
        void *src = NULL, *dst = NULL;
+       int i;
 
        alloc_mem(&src, &dst, len);
 
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
                fn(dst, src, len);
 
        BUG_ON(gettimeofday(&tv_start, NULL));
-       fn(dst, src, len);
+       for (i = 0; i < iterations; ++i)
+               fn(dst, src, len);
        BUG_ON(gettimeofday(&tv_end, NULL));
 
        timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644 (file)
index 0000000..a040fa7
--- /dev/null
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc)              \
+       extern void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644 (file)
index 0000000..a71dff9
--- /dev/null
@@ -0,0 +1,12 @@
+
+MEMSET_FN(__memset,
+       "x86-64-unrolled",
+       "unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c,
+       "x86-64-stosq",
+       "movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c_e,
+       "x86-64-stosb",
+       "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644 (file)
index 0000000..9e5af89
--- /dev/null
@@ -0,0 +1,13 @@
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemset_c globl memset_c; memset_c
+#define Lmemset_c_e globl memset_c_e; memset_c_e
+#include "../../../arch/x86/lib/memset_64.S"
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644 (file)
index 0000000..e907918
--- /dev/null
@@ -0,0 +1,297 @@
+/*
+ * mem-memset.c
+ *
+ * memset: Simple memory set in various ways
+ *
+ * Trivial clone of mem-memcpy.c.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char      *length_str     = "1MB";
+static const char      *routine        = "default";
+static int             iterations      = 1;
+static bool            use_clock;
+static int             clock_fd;
+static bool            only_prefault;
+static bool            no_prefault;
+
+static const struct option options[] = {
+       OPT_STRING('l', "length", &length_str, "1MB",
+                   "Specify length of memory to copy. "
+                   "available unit: B, MB, GB (upper and lower)"),
+       OPT_STRING('r', "routine", &routine, "default",
+                   "Specify routine to copy"),
+       OPT_INTEGER('i', "iterations", &iterations,
+                   "repeat memset() invocation this number of times"),
+       OPT_BOOLEAN('c', "clock", &use_clock,
+                   "Use CPU clock for measuring"),
+       OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+                   "Show only the result with page faults before memset()"),
+       OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+                   "Show only the result without page faults before memset()"),
+       OPT_END()
+};
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct routine {
+       const char *name;
+       const char *desc;
+       memset_t fn;
+};
+
+static const struct routine routines[] = {
+       { "default",
+         "Default memset() provided by glibc",
+         memset },
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+       { NULL,
+         NULL,
+         NULL   }
+};
+
+static const char * const bench_mem_memset_usage[] = {
+       "perf bench mem memset <options>",
+       NULL
+};
+
+static struct perf_event_attr clock_attr = {
+       .type           = PERF_TYPE_HARDWARE,
+       .config         = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+       clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+       if (clock_fd < 0 && errno == ENOSYS)
+               die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+       else
+               BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+       int ret;
+       u64 clk;
+
+       ret = read(clock_fd, &clk, sizeof(u64));
+       BUG_ON(ret != sizeof(u64));
+
+       return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+       return (double)ts->tv_sec +
+               (double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, size_t length)
+{
+       *dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+{
+       u64 clock_start = 0ULL, clock_end = 0ULL;
+       void *dst = NULL;
+       int i;
+
+       alloc_mem(&dst, len);
+
+       if (prefault)
+               fn(dst, -1, len);
+
+       clock_start = get_clock();
+       for (i = 0; i < iterations; ++i)
+               fn(dst, i, len);
+       clock_end = get_clock();
+
+       free(dst);
+       return clock_end - clock_start;
+}
+
+static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
+{
+       struct timeval tv_start, tv_end, tv_diff;
+       void *dst = NULL;
+       int i;
+
+       alloc_mem(&dst, len);
+
+       if (prefault)
+               fn(dst, -1, len);
+
+       BUG_ON(gettimeofday(&tv_start, NULL));
+       for (i = 0; i < iterations; ++i)
+               fn(dst, i, len);
+       BUG_ON(gettimeofday(&tv_end, NULL));
+
+       timersub(&tv_end, &tv_start, &tv_diff);
+
+       free(dst);
+       return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {                                      \
+               if (x < K)                                      \
+                       printf(" %14lf B/Sec", x);              \
+               else if (x < K * K)                             \
+                       printf(" %14lfd KB/Sec", x / K);        \
+               else if (x < K * K * K)                         \
+                       printf(" %14lf MB/Sec", x / K / K);     \
+               else                                            \
+                       printf(" %14lf GB/Sec", x / K / K / K); \
+       } while (0)
+
+int bench_mem_memset(int argc, const char **argv,
+                    const char *prefix __used)
+{
+       int i;
+       size_t len;
+       double result_bps[2];
+       u64 result_clock[2];
+
+       argc = parse_options(argc, argv, options,
+                            bench_mem_memset_usage, 0);
+
+       if (use_clock)
+               init_clock();
+
+       len = (size_t)perf_atoll((char *)length_str);
+
+       result_clock[0] = result_clock[1] = 0ULL;
+       result_bps[0] = result_bps[1] = 0.0;
+
+       if ((s64)len <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
+       for (i = 0; routines[i].name; i++) {
+               if (!strcmp(routines[i].name, routine))
+                       break;
+       }
+       if (!routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              routines[i].name, routines[i].desc);
+               }
+               return 1;
+       }
+
+       if (bench_format == BENCH_FORMAT_DEFAULT)
+               printf("# Copying %s Bytes ...\n\n", length_str);
+
+       if (!only_prefault && !no_prefault) {
+               /* show both of results */
+               if (use_clock) {
+                       result_clock[0] =
+                               do_memset_clock(routines[i].fn, len, false);
+                       result_clock[1] =
+                               do_memset_clock(routines[i].fn, len, true);
+               } else {
+                       result_bps[0] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, false);
+                       result_bps[1] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, true);
+               }
+       } else {
+               if (use_clock) {
+                       result_clock[pf] =
+                               do_memset_clock(routines[i].fn,
+                                               len, only_prefault);
+               } else {
+                       result_bps[pf] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, only_prefault);
+               }
+       }
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte\n",
+                                       (double)result_clock[0]
+                                       / (double)len);
+                               printf(" %14lf Clock/Byte (with prefault)\n ",
+                                       (double)result_clock[1]
+                                       / (double)len);
+                       } else {
+                               print_bps(result_bps[0]);
+                               printf("\n");
+                               print_bps(result_bps[1]);
+                               printf(" (with prefault)\n");
+                       }
+               } else {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte",
+                                       (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               print_bps(result_bps[pf]);
+
+                       printf("%s\n", only_prefault ? " (with prefault)" : "");
+               }
+               break;
+       case BENCH_FORMAT_SIMPLE:
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf("%lf %lf\n",
+                                       (double)result_clock[0] / (double)len,
+                                       (double)result_clock[1] / (double)len);
+                       } else {
+                               printf("%lf %lf\n",
+                                       result_bps[0], result_bps[1]);
+                       }
+               } else {
+                       if (use_clock) {
+                               printf("%lf\n", (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               printf("%lf\n", result_bps[pf]);
+               }
+               break;
+       default:
+               /* reaching this means there's some disaster: */
+               die("unknown format: %d\n", bench_format);
+               break;
+       }
+
+       return 0;
+}
index fcb9626..b0e74ab 100644 (file)
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
        { "memcpy",
          "Simple memory copy in various ways",
          bench_mem_memcpy },
+       { "memset",
+         "Simple memory set in various ways",
+         bench_mem_memset },
        suite_all,
        { NULL,
          NULL,
index 2296c39..12c8148 100644 (file)
@@ -922,12 +922,12 @@ static const struct option info_options[] = {
        OPT_BOOLEAN('t', "threads", &info_threads,
                    "dump thread list in perf.data"),
        OPT_BOOLEAN('m', "map", &info_map,
-                   "map of lock instances (name:address table)"),
+                   "map of lock instances (address:name table)"),
        OPT_END()
 };
 
 static const char * const lock_usage[] = {
-       "perf lock [<options>] {record|trace|report}",
+       "perf lock [<options>] {record|report|script|info}",
        NULL
 };
 
index fb85661..4935c09 100644 (file)
@@ -58,7 +58,7 @@ static struct {
        struct perf_probe_event events[MAX_PROBES];
        struct strlist *dellist;
        struct line_range line_range;
-       const char *target_module;
+       const char *target;
        int max_probe_points;
        struct strfilter *filter;
 } params;
@@ -246,7 +246,7 @@ static const struct option options[] = {
                   "file", "vmlinux pathname"),
        OPT_STRING('s', "source", &symbol_conf.source_prefix,
                   "directory", "path to kernel source"),
-       OPT_STRING('m', "module", &params.target_module,
+       OPT_STRING('m', "module", &params.target,
                   "modname|path",
                   "target module name (for online) or path (for offline)"),
 #endif
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
                if (!params.filter)
                        params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
                                                       NULL);
-               ret = show_available_funcs(params.target_module,
+               ret = show_available_funcs(params.target,
                                           params.filter);
                strfilter__delete(params.filter);
                if (ret < 0)
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
                        usage_with_options(probe_usage, options);
                }
 
-               ret = show_line_range(&params.line_range, params.target_module);
+               ret = show_line_range(&params.line_range, params.target);
                if (ret < 0)
                        pr_err("  Error: Failed to show lines. (%d)\n", ret);
                return ret;
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 
                ret = show_available_vars(params.events, params.nevents,
                                          params.max_probe_points,
-                                         params.target_module,
+                                         params.target,
                                          params.filter,
                                          params.show_ext_vars);
                strfilter__delete(params.filter);
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
        if (params.nevents) {
                ret = add_perf_probe_events(params.events, params.nevents,
                                            params.max_probe_points,
-                                           params.target_module,
+                                           params.target,
                                            params.force_add);
                if (ret < 0) {
                        pr_err("  Error: Failed to add events. (%d)\n", ret);
index 0abfb18..be4e1ee 100644 (file)
@@ -44,6 +44,7 @@ struct perf_record {
        struct perf_evlist      *evlist;
        struct perf_session     *session;
        const char              *progname;
+       const char              *uid_str;
        int                     output;
        unsigned int            page_size;
        int                     realtime_prio;
@@ -204,8 +205,11 @@ static void perf_record__open(struct perf_record *rec)
 
                if (opts->group && pos != first)
                        group_fd = first->fd;
+fallback_missing_features:
+               if (opts->exclude_guest_missing)
+                       attr->exclude_guest = attr->exclude_host = 0;
 retry_sample_id:
-               attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+               attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 try_again:
                if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
                                     opts->group, group_fd) < 0) {
@@ -217,15 +221,23 @@ try_again:
                        } else if (err ==  ENODEV && opts->cpu_list) {
                                die("No such device - did you specify"
                                        " an out-of-range profile CPU?\n");
-                       } else if (err == EINVAL && opts->sample_id_all_avail) {
-                               /*
-                                * Old kernel, no attr->sample_id_type_all field
-                                */
-                               opts->sample_id_all_avail = false;
-                               if (!opts->sample_time && !opts->raw_samples && !time_needed)
-                                       attr->sample_type &= ~PERF_SAMPLE_TIME;
-
-                               goto retry_sample_id;
+                       } else if (err == EINVAL) {
+                               if (!opts->exclude_guest_missing &&
+                                   (attr->exclude_guest || attr->exclude_host)) {
+                                       pr_debug("Old kernel, cannot exclude "
+                                                "guest or host samples.\n");
+                                       opts->exclude_guest_missing = true;
+                                       goto fallback_missing_features;
+                               } else if (!opts->sample_id_all_missing) {
+                                       /*
+                                        * Old kernel, no attr->sample_id_type_all field
+                                        */
+                                       opts->sample_id_all_missing = true;
+                                       if (!opts->sample_time && !opts->raw_samples && !time_needed)
+                                               attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+                                       goto retry_sample_id;
+                               }
                        }
 
                        /*
@@ -385,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 {
        struct stat st;
        int flags;
-       int err, output;
+       int err, output, feat;
        unsigned long waking = 0;
        const bool forks = argc > 0;
        struct machine *machine;
@@ -452,8 +464,17 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
        rec->session = session;
 
-       if (!rec->no_buildid)
-               perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+       for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+               perf_header__set_feat(&session->header, feat);
+
+       if (rec->no_buildid)
+               perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+
+       if (!have_tracepoints(&evsel_list->entries))
+               perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
+
+       if (!rec->opts.branch_stack)
+               perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 
        if (!rec->file_new) {
                err = perf_session__read_header(session, output);
@@ -461,22 +482,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
                        goto out_delete_session;
        }
 
-       if (have_tracepoints(&evsel_list->entries))
-               perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
-
-       perf_header__set_feat(&session->header, HEADER_HOSTNAME);
-       perf_header__set_feat(&session->header, HEADER_OSRELEASE);
-       perf_header__set_feat(&session->header, HEADER_ARCH);
-       perf_header__set_feat(&session->header, HEADER_CPUDESC);
-       perf_header__set_feat(&session->header, HEADER_NRCPUS);
-       perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
-       perf_header__set_feat(&session->header, HEADER_CMDLINE);
-       perf_header__set_feat(&session->header, HEADER_VERSION);
-       perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
-       perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
-       perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
-       perf_header__set_feat(&session->header, HEADER_CPUID);
-
        if (forks) {
                err = perf_evlist__prepare_workload(evsel_list, opts, argv);
                if (err < 0) {
@@ -503,9 +508,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
                        return err;
        }
 
-       if (!!rec->no_buildid
+       if (!rec->no_buildid
            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
-               pr_err("Couldn't generating buildids. "
+               pr_err("Couldn't generate buildids. "
                       "Use --no-buildid to profile anyway.\n");
                return -1;
        }
@@ -636,6 +641,90 @@ out_delete_session:
        return err;
 }
 
+#define BRANCH_OPT(n, m) \
+       { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+       const char *name;
+       int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+       BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+       BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+       BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+       BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+       BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+       BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+       BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+       BRANCH_END
+};
+
+static int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+#define ONLY_PLM \
+       (PERF_SAMPLE_BRANCH_USER        |\
+        PERF_SAMPLE_BRANCH_KERNEL      |\
+        PERF_SAMPLE_BRANCH_HV)
+
+       uint64_t *mode = (uint64_t *)opt->value;
+       const struct branch_mode *br;
+       char *s, *os = NULL, *p;
+       int ret = -1;
+
+       if (unset)
+               return 0;
+
+       /*
+        * cannot set it twice, -b + --branch-filter for instance
+        */
+       if (*mode)
+               return -1;
+
+       /* str may be NULL in case no arg is passed to -b */
+       if (str) {
+               /* because str is read-only */
+               s = os = strdup(str);
+               if (!s)
+                       return -1;
+
+               for (;;) {
+                       p = strchr(s, ',');
+                       if (p)
+                               *p = '\0';
+
+                       for (br = branch_modes; br->name; br++) {
+                               if (!strcasecmp(s, br->name))
+                                       break;
+                       }
+                       if (!br->name) {
+                               ui__warning("unknown branch filter %s,"
+                                           " check man page\n", s);
+                               goto error;
+                       }
+
+                       *mode |= br->mode;
+
+                       if (!p)
+                               break;
+
+                       s = p + 1;
+               }
+       }
+       ret = 0;
+
+       /* default to any branch */
+       if ((*mode & ~ONLY_PLM) == 0) {
+               *mode = PERF_SAMPLE_BRANCH_ANY;
+       }
+error:
+       free(os);
+       return ret;
+}
+
 static const char * const record_usage[] = {
        "perf record [<options>] [<command>]",
        "perf record [<options>] -- <command> [<options>]",
@@ -654,13 +743,10 @@ static const char * const record_usage[] = {
  */
 static struct perf_record record = {
        .opts = {
-               .target_pid          = -1,
-               .target_tid          = -1,
                .mmap_pages          = UINT_MAX,
                .user_freq           = UINT_MAX,
                .user_interval       = ULLONG_MAX,
                .freq                = 1000,
-               .sample_id_all_avail = true,
        },
        .write_mode = WRITE_FORCE,
        .file_new   = true,
@@ -679,9 +765,9 @@ const struct option record_options[] = {
                     parse_events_option),
        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
                     "event filter", parse_filter),
-       OPT_INTEGER('p', "pid", &record.opts.target_pid,
+       OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
                    "record events on existing process id"),
-       OPT_INTEGER('t', "tid", &record.opts.target_tid,
+       OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
                    "record events on existing thread id"),
        OPT_INTEGER('r', "realtime", &record.realtime_prio,
                    "collect data with this RT SCHED_FIFO priority"),
@@ -727,6 +813,15 @@ const struct option record_options[] = {
        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
                     "monitor event in cgroup name only",
                     parse_cgroups),
+       OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
+
+       OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
+                    "branch any", "sample any taken branches",
+                    parse_branch_stack),
+
+       OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
+                    "branch filter mask", "branch stack filter modes",
+                    parse_branch_stack),
        OPT_END()
 };
 
@@ -747,8 +842,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 
        argc = parse_options(argc, argv, record_options, record_usage,
                            PARSE_OPT_STOP_AT_NON_OPTION);
-       if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
-               !rec->opts.system_wide && !rec->opts.cpu_list)
+       if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
+               !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
                usage_with_options(record_usage, record_options);
 
        if (rec->force && rec->append_file) {
@@ -788,11 +883,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
                goto out_symbol_exit;
        }
 
-       if (rec->opts.target_pid != -1)
+       rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
+                                        rec->opts.target_pid);
+       if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
+               goto out_free_fd;
+
+       if (rec->opts.target_pid)
                rec->opts.target_tid = rec->opts.target_pid;
 
        if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
-                                    rec->opts.target_tid, rec->opts.cpu_list) < 0)
+                                    rec->opts.target_tid, rec->opts.uid,
+                                    rec->opts.cpu_list) < 0)
                usage_with_options(record_usage, record_options);
 
        list_for_each_entry(pos, &evsel_list->entries, node) {
index 25d34d4..8e91c6e 100644 (file)
@@ -53,6 +53,82 @@ struct perf_report {
        DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
+                                       struct addr_location *al,
+                                       struct perf_sample *sample,
+                                       struct perf_evsel *evsel,
+                                     struct machine *machine)
+{
+       struct perf_report *rep = container_of(tool, struct perf_report, tool);
+       struct symbol *parent = NULL;
+       int err = 0;
+       unsigned i;
+       struct hist_entry *he;
+       struct branch_info *bi, *bx;
+
+       if ((sort__has_parent || symbol_conf.use_callchain)
+           && sample->callchain) {
+               err = machine__resolve_callchain(machine, evsel, al->thread,
+                                                sample->callchain, &parent);
+               if (err)
+                       return err;
+       }
+
+       bi = machine__resolve_bstack(machine, al->thread,
+                                    sample->branch_stack);
+       if (!bi)
+               return -ENOMEM;
+
+       for (i = 0; i < sample->branch_stack->nr; i++) {
+               if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+                       continue;
+               /*
+                * The report shows the percentage of total branches captured
+                * and not events sampled. Thus we use a pseudo period of 1.
+                */
+               he = __hists__add_branch_entry(&evsel->hists, al, parent,
+                               &bi[i], 1);
+               if (he) {
+                       struct annotation *notes;
+                       err = -ENOMEM;
+                       bx = he->branch_info;
+                       if (bx->from.sym && use_browser > 0) {
+                               notes = symbol__annotation(bx->from.sym);
+                               if (!notes->src
+                                   && symbol__alloc_hist(bx->from.sym) < 0)
+                                       goto out;
+
+                               err = symbol__inc_addr_samples(bx->from.sym,
+                                                              bx->from.map,
+                                                              evsel->idx,
+                                                              bx->from.al_addr);
+                               if (err)
+                                       goto out;
+                       }
+
+                       if (bx->to.sym && use_browser > 0) {
+                               notes = symbol__annotation(bx->to.sym);
+                               if (!notes->src
+                                   && symbol__alloc_hist(bx->to.sym) < 0)
+                                       goto out;
+
+                               err = symbol__inc_addr_samples(bx->to.sym,
+                                                              bx->to.map,
+                                                              evsel->idx,
+                                                              bx->to.al_addr);
+                               if (err)
+                                       goto out;
+                       }
+                       evsel->hists.stats.total_period += 1;
+                       hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+                       err = 0;
+               } else
+                       return -ENOMEM;
+       }
+out:
+       return err;
+}
+
 static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
                                      struct addr_location *al,
                                      struct perf_sample *sample,
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
        if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
                return 0;
 
-       if (al.map != NULL)
-               al.map->dso->hit = 1;
+       if (sort__branch_mode == 1) {
+               if (perf_report__add_branch_hist_entry(tool, &al, sample,
+                                                      evsel, machine)) {
+                       pr_debug("problem adding lbr entry, skipping event\n");
+                       return -1;
+               }
+       } else {
+               if (al.map != NULL)
+                       al.map->dso->hit = 1;
 
-       if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
-               pr_debug("problem incrementing symbol period, skipping event\n");
-               return -1;
+               if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
+                       pr_debug("problem incrementing symbol period, skipping event\n");
+                       return -1;
+               }
        }
-
        return 0;
 }
 
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
                        }
        }
 
+       if (sort__branch_mode == 1) {
+               if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+                       fprintf(stderr, "selected -b but no branch data."
+                                       " Did you call perf record without"
+                                       " -b?\n");
+                       return -1;
+               }
+       }
+
        return 0;
 }
 
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
 {
        int ret = -EINVAL;
        u64 nr_samples;
-       struct perf_session *session;
+       struct perf_session *session = rep->session;
        struct perf_evsel *pos;
        struct map *kernel_map;
        struct kmap *kernel_kmap;
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
 
        signal(SIGINT, sig_handler);
 
-       session = perf_session__new(rep->input_name, O_RDONLY,
-                                   rep->force, false, &rep->tool);
-       if (session == NULL)
-               return -ENOMEM;
-
-       rep->session = session;
-
        if (rep->cpu_list) {
                ret = perf_session__cpu_bitmap(session, rep->cpu_list,
                                               rep->cpu_bitmap);
@@ -427,9 +512,19 @@ setup:
        return 0;
 }
 
+static int
+parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
+{
+       sort__branch_mode = !unset;
+       return 0;
+}
+
 int cmd_report(int argc, const char **argv, const char *prefix __used)
 {
+       struct perf_session *session;
        struct stat st;
+       bool has_br_stack = false;
+       int ret = -1;
        char callchain_default_opt[] = "fractal,0.5,callee";
        const char * const report_usage[] = {
                "perf report [<options>]",
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
        OPT_BOOLEAN(0, "stdio", &report.use_stdio,
                    "Use the stdio interface"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-                  "sort by key(s): pid, comm, dso, symbol, parent"),
+                  "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
+                  " dso_from, symbol_to, symbol_from, mispredict"),
        OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
                    "Show sample percentage for different cpu modes"),
        OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
                   "Specify disassembler style (e.g. -M intel for intel syntax)"),
        OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
                    "Show a column with the sum of periods"),
+       OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
+                   "use branch records for histogram filling", parse_branch_mode),
        OPT_END()
        };
 
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
                else
                        report.input_name = "perf.data";
        }
+       session = perf_session__new(report.input_name, O_RDONLY,
+                                   report.force, false, &report.tool);
+       if (session == NULL)
+               return -ENOMEM;
 
-       if (strcmp(report.input_name, "-") != 0)
+       report.session = session;
+
+       has_br_stack = perf_header__has_feat(&session->header,
+                                            HEADER_BRANCH_STACK);
+
+       if (sort__branch_mode == -1 && has_br_stack)
+               sort__branch_mode = 1;
+
+       /* sort__branch_mode could be 0 if --no-branch-stack */
+       if (sort__branch_mode == 1) {
+               /*
+                * if no sort_order is provided, then specify
+                * branch-mode specific order
+                */
+               if (sort_order == default_sort_order)
+                       sort_order = "comm,dso_from,symbol_from,"
+                                    "dso_to,symbol_to";
+
+       }
+
+       if (strcmp(report.input_name, "-") != 0) {
                setup_browser(true);
-       else
+       } else {
                use_browser = 0;
+       }
 
        /*
         * Only in the newt browser we are doing integrated annotation,
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
        }
 
        if (symbol__init() < 0)
-               return -1;
+               goto error;
 
        setup_sorting(report_usage, options);
 
        if (parent_pattern != default_parent_pattern) {
                if (sort_dimension__add("parent") < 0)
-                       return -1;
+                       goto error;
 
                /*
                 * Only show the parent fields if we explicitly
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
        if (argc)
                usage_with_options(report_usage, options);
 
-       sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
        sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
-       sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
 
-       return __cmd_report(&report);
+       if (sort__branch_mode == 1) {
+               sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
+               sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
+               sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
+               sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
+       } else {
+               sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
+               sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
+       }
+
+       ret = __cmd_report(&report);
+error:
+       perf_session__delete(session);
+       return ret;
 }
index bb68ddf..d4ce733 100644 (file)
@@ -40,6 +40,7 @@ enum perf_output_field {
        PERF_OUTPUT_SYM             = 1U << 8,
        PERF_OUTPUT_DSO             = 1U << 9,
        PERF_OUTPUT_ADDR            = 1U << 10,
+       PERF_OUTPUT_SYMOFFSET       = 1U << 11,
 };
 
 struct output_option {
@@ -57,6 +58,7 @@ struct output_option {
        {.str = "sym",   .field = PERF_OUTPUT_SYM},
        {.str = "dso",   .field = PERF_OUTPUT_DSO},
        {.str = "addr",  .field = PERF_OUTPUT_ADDR},
+       {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
 };
 
 /* default set to maintain compatibility with current format */
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                       "to symbols.\n");
                return -EINVAL;
        }
+       if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
+               pr_err("Display of offsets requested but symbol is not"
+                      "selected.\n");
+               return -EINVAL;
+       }
        if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
                pr_err("Display of DSO requested but neither sample IP nor "
                           "sample address\nis selected. Hence, no addresses to convert "
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample,
                } else
                        evname = __event_name(attr->type, attr->config);
 
-               printf("%s: ", evname ? evname : "(unknown)");
+               printf("%s: ", evname ? evname : "[unknown]");
        }
 }
 
+static bool is_bts_event(struct perf_event_attr *attr)
+{
+       return ((attr->type == PERF_TYPE_HARDWARE) &&
+               (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+               (attr->sample_period == 1));
+}
+
 static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
 {
        if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
             (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
                return true;
 
+       if (is_bts_event(attr))
+               return true;
+
        return false;
 }
 
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event,
 {
        struct addr_location al;
        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-       const char *symname, *dsoname;
 
        printf("%16" PRIx64, sample->addr);
 
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event,
                al.sym = map__find_symbol(al.map, al.addr, NULL);
 
        if (PRINT_FIELD(SYM)) {
-               if (al.sym && al.sym->name)
-                       symname = al.sym->name;
+               printf(" ");
+               if (PRINT_FIELD(SYMOFFSET))
+                       symbol__fprintf_symname_offs(al.sym, &al, stdout);
                else
-                       symname = "";
-
-               printf(" %16s", symname);
+                       symbol__fprintf_symname(al.sym, stdout);
        }
 
        if (PRINT_FIELD(DSO)) {
-               if (al.map && al.map->dso && al.map->dso->name)
-                       dsoname = al.map->dso->name;
-               else
-                       dsoname = "";
+               printf(" (");
+               map__fprintf_dsoname(al.map, stdout);
+               printf(")");
+       }
+}
 
-               printf(" (%s)", dsoname);
+static void print_sample_bts(union perf_event *event,
+                            struct perf_sample *sample,
+                            struct perf_evsel *evsel,
+                            struct machine *machine,
+                            struct thread *thread)
+{
+       struct perf_event_attr *attr = &evsel->attr;
+
+       /* print branch_from information */
+       if (PRINT_FIELD(IP)) {
+               if (!symbol_conf.use_callchain)
+                       printf(" ");
+               else
+                       printf("\n");
+               perf_event__print_ip(event, sample, machine, evsel,
+                                    PRINT_FIELD(SYM), PRINT_FIELD(DSO),
+                                    PRINT_FIELD(SYMOFFSET));
        }
+
+       printf(" => ");
+
+       /* print branch_to information */
+       if (PRINT_FIELD(ADDR))
+               print_sample_addr(event, sample, machine, thread, attr);
+
+       printf("\n");
 }
 
 static void process_event(union perf_event *event __unused,
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused,
 
        print_sample_start(sample, thread, attr);
 
+       if (is_bts_event(attr)) {
+               print_sample_bts(event, sample, evsel, machine, thread);
+               return;
+       }
+
        if (PRINT_FIELD(TRACE))
                print_trace_event(sample->cpu, sample->raw_data,
                                  sample->raw_size);
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused,
                else
                        printf("\n");
                perf_event__print_ip(event, sample, machine, evsel,
-                                    PRINT_FIELD(SYM), PRINT_FIELD(DSO));
+                                    PRINT_FIELD(SYM), PRINT_FIELD(DSO),
+                                    PRINT_FIELD(SYMOFFSET));
        }
 
        printf("\n");
@@ -1097,7 +1143,10 @@ static const struct option options[] = {
        OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
                    "Look for files with symbols relative to this directory"),
        OPT_CALLBACK('f', "fields", NULL, "str",
-                    "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
+                    "comma separated output fields prepend with 'type:'. "
+                    "Valid types: hw,sw,trace,raw. "
+                    "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
+                    "addr,symoff",
                     parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                     "system-wide collection from all CPUs"),
@@ -1106,6 +1155,9 @@ static const struct option options[] = {
                   "only display events for these comms"),
        OPT_BOOLEAN('I', "show-info", &show_full_info,
                    "display extended information from perf.data file"),
+       OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
+                   "Show the path of [kernel.kallsyms]"),
+
        OPT_END()
 };
 
index f5d2a63..ea40e4e 100644 (file)
@@ -182,8 +182,8 @@ static int                  run_count                       =  1;
 static bool                    no_inherit                      = false;
 static bool                    scale                           =  true;
 static bool                    no_aggr                         = false;
-static pid_t                   target_pid                      = -1;
-static pid_t                   target_tid                      = -1;
+static const char              *target_pid;
+static const char              *target_tid;
 static pid_t                   child_pid                       = -1;
 static bool                    null_run                        =  false;
 static int                     detailed_run                    =  0;
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel,
        if (system_wide)
                return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
                                                group, group_fd);
-       if (target_pid == -1 && target_tid == -1) {
+       if (!target_pid && !target_tid) {
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv)
                        exit(-1);
                }
 
-               if (target_tid == -1 && target_pid == -1 && !system_wide)
+               if (!target_tid && !target_pid && !system_wide)
                        evsel_list->threads->map[0] = child_pid;
 
                /*
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
                fprintf(output, " # %8.3f CPUs utilized          ",
                        avg / avg_stats(&walltime_nsecs_stats));
+       else
+               fprintf(output, "                                   ");
 }
 
 /* used for get_ratio_color() */
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
                fprintf(output, " # %8.3f GHz                    ", ratio);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
+               char unit = 'M';
+
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total)
                        ratio = 1000.0 * avg / total;
+               if (ratio < 0.001) {
+                       ratio *= 1000;
+                       unit = 'K';
+               }
 
-               fprintf(output, " # %8.3f M/sec                  ", ratio);
+               fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
        } else {
                fprintf(output, "                                   ");
        }
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv)
        if (!csv_output) {
                fprintf(output, "\n");
                fprintf(output, " Performance counter stats for ");
-               if(target_pid == -1 && target_tid == -1) {
+               if (!target_pid && !target_tid) {
                        fprintf(output, "\'%s", argv[0]);
                        for (i = 1; i < argc; i++)
                                fprintf(output, " %s", argv[i]);
-               } else if (target_pid != -1)
-                       fprintf(output, "process id \'%d", target_pid);
+               } else if (target_pid)
+                       fprintf(output, "process id \'%s", target_pid);
                else
-                       fprintf(output, "thread id \'%d", target_tid);
+                       fprintf(output, "thread id \'%s", target_tid);
 
                fprintf(output, "\'");
                if (run_count > 1)
@@ -1041,10 +1049,10 @@ static const struct option options[] = {
                     "event filter", parse_filter),
        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
                    "child tasks do not inherit counters"),
-       OPT_INTEGER('p', "pid", &target_pid,
-                   "stat events on existing process id"),
-       OPT_INTEGER('t', "tid", &target_tid,
-                   "stat events on existing thread id"),
+       OPT_STRING('p', "pid", &target_pid, "pid",
+                  "stat events on existing process id"),
+       OPT_STRING('t', "tid", &target_tid, "tid",
+                  "stat events on existing thread id"),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
        OPT_BOOLEAN('g', "group", &group,
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
        } else if (big_num_opt == 0) /* User passed --no-big-num */
                big_num = false;
 
-       if (!argc && target_pid == -1 && target_tid == -1)
+       if (!argc && !target_pid && !target_tid)
                usage_with_options(stat_usage, options);
        if (run_count <= 0)
                usage_with_options(stat_usage, options);
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
        if (add_default_attributes())
                goto out;
 
-       if (target_pid != -1)
+       if (target_pid)
                target_tid = target_pid;
 
-       evsel_list->threads = thread_map__new(target_pid, target_tid);
+       evsel_list->threads = thread_map__new_str(target_pid,
+                                                 target_tid, UINT_MAX);
        if (evsel_list->threads == NULL) {
                pr_err("Problems finding threads of monitor\n");
                usage_with_options(stat_usage, options);
index 3854e86..3e087ce 100644 (file)
@@ -15,6 +15,8 @@
 #include "util/thread_map.h"
 #include "../../include/linux/hw_breakpoint.h"
 
+#include <sys/mman.h>
+
 static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
 {
        bool *visited = symbol__priv(sym);
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void)
                return -1;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void)
                return -1;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -490,7 +492,7 @@ static int test__basic_mmap(void)
                expected_nr_events[i] = random() % 257;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -1008,12 +1010,9 @@ realloc:
 static int test__PERF_RECORD(void)
 {
        struct perf_record_opts opts = {
-               .target_pid = -1,
-               .target_tid = -1,
                .no_delay   = true,
                .freq       = 10,
                .mmap_pages = 256,
-               .sample_id_all_avail = true,
        };
        cpu_set_t *cpu_mask = NULL;
        size_t cpu_mask_size = 0;
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void)
         * we're monitoring, the one forked there.
         */
        err = perf_evlist__create_maps(evlist, opts.target_pid,
-                                      opts.target_tid, opts.cpu_list);
+                                      opts.target_tid, UINT_MAX, opts.cpu_list);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
@@ -1296,6 +1295,173 @@ out:
        return (err < 0 || errs > 0) ? -1 : 0;
 }
 
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+       unsigned int low, high;
+
+       asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+       return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+       unsigned int low, high;
+
+       asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+       return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+       struct perf_event_mmap_page *pc = addr;
+       u32 seq, idx, time_mult = 0, time_shift = 0;
+       u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+       do {
+               seq = pc->lock;
+               barrier();
+
+               enabled = pc->time_enabled;
+               running = pc->time_running;
+
+               if (enabled != running) {
+                       cyc = rdtsc();
+                       time_mult = pc->time_mult;
+                       time_shift = pc->time_shift;
+                       time_offset = pc->time_offset;
+               }
+
+               idx = pc->index;
+               count = pc->offset;
+               if (idx)
+                       count += rdpmc(idx - 1);
+
+               barrier();
+       } while (pc->lock != seq);
+
+       if (enabled != running) {
+               u64 quot, rem;
+
+               quot = (cyc >> time_shift);
+               rem = cyc & ((1 << time_shift) - 1);
+               delta = time_offset + quot * time_mult +
+                       ((rem * time_mult) >> time_shift);
+
+               enabled += delta;
+               if (idx)
+                       running += delta;
+
+               quot = count / running;
+               rem = count % running;
+               count = quot * enabled + (rem * enabled) / running;
+       }
+
+       return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+{
+       exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+       long page_size = sysconf(_SC_PAGE_SIZE);
+       volatile int tmp = 0;
+       u64 i, loops = 1000;
+       int n;
+       int fd;
+       void *addr;
+       struct perf_event_attr attr = {
+               .type = PERF_TYPE_HARDWARE,
+               .config = PERF_COUNT_HW_INSTRUCTIONS,
+               .exclude_kernel = 1,
+       };
+       u64 delta_sum = 0;
+        struct sigaction sa;
+
+       sigfillset(&sa.sa_mask);
+       sa.sa_sigaction = segfault_handler;
+       sigaction(SIGSEGV, &sa, NULL);
+
+       fprintf(stderr, "\n\n");
+
+       fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       if (fd < 0) {
+               die("Error: sys_perf_event_open() syscall returned "
+                   "with %d (%s)\n", fd, strerror(errno));
+       }
+
+       addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+       if (addr == (void *)(-1)) {
+               die("Error: mmap() syscall returned "
+                   "with (%s)\n", strerror(errno));
+       }
+
+       for (n = 0; n < 6; n++) {
+               u64 stamp, now, delta;
+
+               stamp = mmap_read_self(addr);
+
+               for (i = 0; i < loops; i++)
+                       tmp++;
+
+               now = mmap_read_self(addr);
+               loops *= 10;
+
+               delta = now - stamp;
+               fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+
+               delta_sum += delta;
+       }
+
+       munmap(addr, page_size);
+       close(fd);
+
+       fprintf(stderr, "   ");
+
+       if (!delta_sum)
+               return -1;
+
+       return 0;
+}
+
+static int test__rdpmc(void)
+{
+       int status = 0;
+       int wret = 0;
+       int ret;
+       int pid;
+
+       pid = fork();
+       if (pid < 0)
+               return -1;
+
+       if (!pid) {
+               ret = __test__rdpmc();
+
+               exit(ret);
+       }
+
+       wret = waitpid(pid, &status, 0);
+       if (wret < 0 || status)
+               return -1;
+
+       return 0;
+}
+
+#endif
+
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -1320,6 +1486,12 @@ static struct test {
                .desc = "parse events tests",
                .func = test__parse_events,
        },
+#if defined(__x86_64__) || defined(__i386__)
+       {
+               .desc = "x86 rdpmc test",
+               .func = test__rdpmc,
+       },
+#endif
        {
                .desc = "Validate PERF_RECORD_* events & perf_sample fields",
                .func = test__PERF_RECORD,
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
        if (symbol__init() < 0)
                return -1;
 
-       setup_pager();
-
        return __cmd_test(argc, argv);
 }
index dd162aa..e3c63ae 100644 (file)
@@ -64,7 +64,6 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-
 void get_term_dimensions(struct winsize *ws)
 {
        char *s = getenv("LINES");
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg)
 
 static void *display_thread_tui(void *arg)
 {
+       struct perf_evsel *pos;
        struct perf_top *top = arg;
        const char *help = "For a higher level overview, try: perf top --sort comm,dso";
 
        perf_top__sort_new_samples(top);
+
+       /*
+        * Initialize the uid_filter_str, in the future the TUI will allow
+        * Zooming in/out UIDs. For now juse use whatever the user passed
+        * via --uid.
+        */
+       list_for_each_entry(pos, &top->evlist->entries, node)
+               pos->hists.uid_filter_str = top->uid_str;
+
        perf_evlist__tui_browse_hists(top->evlist, help,
                                      perf_top__sort_new_samples,
                                      top, top->delay_secs);
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool,
                return;
        }
 
+       if (!machine) {
+               pr_err("%u unprocessable samples recorded.",
+                      top->session->hists.stats.nr_unprocessable_samples++);
+               return;
+       }
+
        if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
                top->exact_samples++;
 
@@ -857,8 +872,11 @@ static void perf_top__start_counters(struct perf_top *top)
                attr->mmap = 1;
                attr->comm = 1;
                attr->inherit = top->inherit;
+fallback_missing_features:
+               if (top->exclude_guest_missing)
+                       attr->exclude_guest = attr->exclude_host = 0;
 retry_sample_id:
-               attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
+               attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
 try_again:
                if (perf_evsel__open(counter, top->evlist->cpus,
                                     top->evlist->threads, top->group,
@@ -868,12 +886,20 @@ try_again:
                        if (err == EPERM || err == EACCES) {
                                ui__error_paranoid();
                                goto out_err;
-                       } else if (err == EINVAL && top->sample_id_all_avail) {
-                               /*
-                                * Old kernel, no attr->sample_id_type_all field
-                                */
-                               top->sample_id_all_avail = false;
-                               goto retry_sample_id;
+                       } else if (err == EINVAL) {
+                               if (!top->exclude_guest_missing &&
+                                   (attr->exclude_guest || attr->exclude_host)) {
+                                       pr_debug("Old kernel, cannot exclude "
+                                                "guest or host samples.\n");
+                                       top->exclude_guest_missing = true;
+                                       goto fallback_missing_features;
+                               } else if (!top->sample_id_all_missing) {
+                                       /*
+                                        * Old kernel, no attr->sample_id_type_all field
+                                        */
+                                       top->sample_id_all_missing = true;
+                                       goto retry_sample_id;
+                               }
                        }
                        /*
                         * If it's cycles then fall back to hrtimer
@@ -956,7 +982,7 @@ static int __cmd_top(struct perf_top *top)
        if (ret)
                goto out_delete;
 
-       if (top->target_tid != -1)
+       if (top->target_tid || top->uid != UINT_MAX)
                perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
                                                  perf_event__process,
                                                  &top->session->host_machine);
@@ -1094,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
        struct perf_top top = {
                .count_filter        = 5,
                .delay_secs          = 2,
-               .target_pid          = -1,
-               .target_tid          = -1,
+               .uid                 = UINT_MAX,
                .freq                = 1000, /* 1 KHz */
-               .sample_id_all_avail = true,
                .mmap_pages          = 128,
                .sym_pcnt_filter     = 5,
        };
@@ -1108,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                     parse_events_option),
        OPT_INTEGER('c', "count", &top.default_interval,
                    "event period to sample"),
-       OPT_INTEGER('p', "pid", &top.target_pid,
+       OPT_STRING('p', "pid", &top.target_pid, "pid",
                    "profile events on existing process id"),
-       OPT_INTEGER('t', "tid", &top.target_tid,
+       OPT_STRING('t', "tid", &top.target_tid, "tid",
                    "profile events on existing thread id"),
        OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
                            "system-wide collection from all CPUs"),
@@ -1169,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                    "Display raw encoding of assembly instructions (default)"),
        OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
                   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+       OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
        OPT_END()
        };
 
@@ -1194,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 
        setup_browser(false);
 
+       top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
+       if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
+               goto out_delete_evlist;
+
        /* CPU and PID are mutually exclusive */
-       if (top.target_tid > 0 && top.cpu_list) {
+       if (top.target_tid && top.cpu_list) {
                printf("WARNING: PID switch overriding CPU\n");
                sleep(1);
                top.cpu_list = NULL;
        }
 
-       if (top.target_pid != -1)
+       if (top.target_pid)
                top.target_tid = top.target_pid;
 
        if (perf_evlist__create_maps(top.evlist, top.target_pid,
-                                    top.target_tid, top.cpu_list) < 0)
+                                    top.target_tid, top.uid, top.cpu_list) < 0)
                usage_with_options(top_usage, options);
 
        if (!top.evlist->nr_entries &&
@@ -1269,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 
        status = __cmd_top(&top);
 
+out_delete_evlist:
        perf_evlist__delete(top.evlist);
 
        return status;
index 64f8bee..89e3355 100644 (file)
@@ -10,6 +10,9 @@ void get_term_dimensions(struct winsize *ws);
 #define rmb()          asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define cpu_relax()    asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC   "model name"
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 336
+#endif
 #endif
 
 #if defined(__x86_64__)
@@ -17,6 +20,9 @@ void get_term_dimensions(struct winsize *ws);
 #define rmb()          asm volatile("lfence" ::: "memory")
 #define cpu_relax()    asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC   "model name"
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 298
+#endif
 #endif
 
 #ifdef __powerpc__
@@ -167,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr,
                      pid_t pid, int cpu, int group_fd,
                      unsigned long flags)
 {
-       attr->size = sizeof(*attr);
        return syscall(__NR_perf_event_open, attr, pid, cpu,
                       group_fd, flags);
 }
@@ -180,14 +185,32 @@ struct ip_callchain {
        u64 ips[0];
 };
 
+struct branch_flags {
+       u64 mispred:1;
+       u64 predicted:1;
+       u64 reserved:62;
+};
+
+struct branch_entry {
+       u64                             from;
+       u64                             to;
+       struct branch_flags flags;
+};
+
+struct branch_stack {
+       u64                             nr;
+       struct branch_entry     entries[0];
+};
+
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
 void pthread__unblock_sigwinch(void);
 
 struct perf_record_opts {
-       pid_t        target_pid;
-       pid_t        target_tid;
+       const char   *target_pid;
+       const char   *target_tid;
+       uid_t        uid;
        bool         call_graph;
        bool         group;
        bool         inherit_stat;
@@ -198,12 +221,14 @@ struct perf_record_opts {
        bool         raw_samples;
        bool         sample_address;
        bool         sample_time;
-       bool         sample_id_all_avail;
+       bool         sample_id_all_missing;
+       bool         exclude_guest_missing;
        bool         system_wide;
        bool         period;
        unsigned int freq;
        unsigned int mmap_pages;
        unsigned int user_freq;
+       int          branch_stack;
        u64          default_interval;
        u64          user_interval;
        const char   *cpu_list;
index df638c4..b11cca5 100755 (executable)
@@ -19,7 +19,7 @@ def main():
        cpus = perf.cpu_map()
        threads = perf.thread_map()
        evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
-                          wakeup_events = 1, sample_period = 1,
+                          wakeup_events = 1, watermark = 1,
                           sample_id_all = 1,
                           sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
        evsel.open(cpus = cpus, threads = threads);
index 011ed26..e5a462f 100644 (file)
@@ -315,7 +315,7 @@ fallback:
                       "Please use:\n\n"
                       "  perf buildid-cache -av vmlinux\n\n"
                       "or:\n\n"
-                      "  --vmlinux vmlinux",
+                      "  --vmlinux vmlinux\n",
                       sym->name, build_id_msg ?: "");
                goto out_free_filename;
        }
index 5e230ac..0a1adc1 100644 (file)
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 
        return w;
 }
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                const unsigned long *bitmap2, int bits)
+{
+       int k;
+       int nr = BITS_TO_LONGS(bits);
+
+       for (k = 0; k < nr; k++)
+               dst[k] = bitmap1[k] | bitmap2[k];
+}
index 521c38a..11e46da 100644 (file)
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
 #include "cache.h"
 #include "color.h"
 
@@ -182,12 +183,12 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color,
        }
 
        if (perf_use_color_default && *color)
-               r += snprintf(bf, size, "%s", color);
-       r += vsnprintf(bf + r, size - r, fmt, args);
+               r += scnprintf(bf, size, "%s", color);
+       r += vscnprintf(bf + r, size - r, fmt, args);
        if (perf_use_color_default && *color)
-               r += snprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
+               r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
        if (trail)
-               r += snprintf(bf + r, size - r, "%s", trail);
+               r += scnprintf(bf + r, size - r, "%s", trail);
        return r;
 }
 
index 6893eec..adc72f0 100644 (file)
@@ -166,6 +166,17 @@ out:
        return cpus;
 }
 
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+       int i;
+       size_t printed = fprintf(fp, "%d cpu%s: ",
+                                map->nr, map->nr > 1 ? "s" : "");
+       for (i = 0; i < map->nr; ++i)
+               printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
+
+       return printed + fprintf(fp, "\n");
+}
+
 struct cpu_map *cpu_map__dummy_new(void)
 {
        struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
index 072c0a3..c415185 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __PERF_CPUMAP_H
 #define __PERF_CPUMAP_H
 
+#include <stdio.h>
+
 struct cpu_map {
        int nr;
        int map[];
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list);
 struct cpu_map *cpu_map__dummy_new(void);
 void cpu_map__delete(struct cpu_map *map);
 
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+
 #endif /* __PERF_CPUMAP_H */
index 3507362..aada3ac 100644 (file)
@@ -3,7 +3,7 @@
  *
  * No surprises, and works with signed and unsigned chars.
  */
-#include "cache.h"
+#include "util.h"
 
 enum {
        S = GIT_SPACE,
index ffc35e7..dd8b193 100644 (file)
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = {
        0,
 };
 
-/* use this to force a umount */
-void debugfs_force_cleanup(void)
-{
-       debugfs_find_mountpoint();
-       debugfs_premounted = 0;
-       debugfs_umount();
-}
-
-/* construct a full path to a debugfs element */
-int debugfs_make_path(const char *element, char *buffer, int size)
-{
-       int len;
-
-       if (strlen(debugfs_mountpoint) == 0) {
-               buffer[0] = '\0';
-               return -1;
-       }
-
-       len = strlen(debugfs_mountpoint) + strlen(element) + 1;
-       if (len >= size)
-               return len+1;
-
-       snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
-       return 0;
-}
-
 static int debugfs_found;
 
 /* find the path to the mounted debugfs */
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs)
        return 0;
 }
 
-
-int debugfs_valid_entry(const char *path)
-{
-       struct stat st;
-
-       if (stat(path, &st))
-               return -errno;
-
-       return 0;
-}
-
 static void debugfs_set_tracing_events_path(const char *mountpoint)
 {
        snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint)
        snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
        debugfs_set_tracing_events_path(mountpoint);
 }
-
-/* umount the debugfs */
-
-int debugfs_umount(void)
-{
-       char umountcmd[128];
-       int ret;
-
-       /* if it was already mounted, leave it */
-       if (debugfs_premounted)
-               return 0;
-
-       /* make sure it's a valid mount point */
-       ret = debugfs_valid_mountpoint(debugfs_mountpoint);
-       if (ret)
-               return ret;
-
-       snprintf(umountcmd, sizeof(umountcmd),
-                "/bin/umount %s", debugfs_mountpoint);
-       return system(umountcmd);
-}
-
-int debugfs_write(const char *entry, const char *value)
-{
-       char path[PATH_MAX + 1];
-       int ret, count;
-       int fd;
-
-       /* construct the path */
-       snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
-       /* verify that it exists */
-       ret = debugfs_valid_entry(path);
-       if (ret)
-               return ret;
-
-       /* get how many chars we're going to write */
-       count = strlen(value);
-
-       /* open the debugfs entry */
-       fd = open(path, O_RDWR);
-       if (fd < 0)
-               return -errno;
-
-       while (count > 0) {
-               /* write it */
-               ret = write(fd, value, count);
-               if (ret <= 0) {
-                       if (ret == EAGAIN)
-                               continue;
-                       close(fd);
-                       return -errno;
-               }
-               count -= ret;
-       }
-
-       /* close it */
-       close(fd);
-
-       /* return success */
-       return 0;
-}
-
-/*
- * read a debugfs entry
- * returns the number of chars read or a negative errno
- */
-int debugfs_read(const char *entry, char *buffer, size_t size)
-{
-       char path[PATH_MAX + 1];
-       int ret;
-       int fd;
-
-       /* construct the path */
-       snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
-       /* verify that it exists */
-       ret = debugfs_valid_entry(path);
-       if (ret)
-               return ret;
-
-       /* open the debugfs entry */
-       fd = open(path, O_RDONLY);
-       if (fd < 0)
-               return -errno;
-
-       do {
-               /* read it */
-               ret = read(fd, buffer, size);
-               if (ret == 0) {
-                       close(fd);
-                       return EOF;
-               }
-       } while (ret < 0 && errno == EAGAIN);
-
-       /* close it */
-       close(fd);
-
-       /* make *sure* there's a null character at the end */
-       buffer[ret] = '\0';
-
-       /* return the number of chars read */
-       return ret;
-}
index 4a878f7..68f3e87 100644 (file)
@@ -3,14 +3,8 @@
 
 const char *debugfs_find_mountpoint(void);
 int debugfs_valid_mountpoint(const char *debugfs);
-int debugfs_valid_entry(const char *path);
 char *debugfs_mount(const char *mountpoint);
-int debugfs_umount(void);
 void debugfs_set_path(const char *mountpoint);
-int debugfs_write(const char *entry, const char *value);
-int debugfs_read(const char *entry, char *buffer, size_t size);
-void debugfs_force_cleanup(void);
-int debugfs_make_path(const char *element, char *buffer, int size);
 
 extern char debugfs_mountpoint[];
 extern char tracing_events_path[];
index cbdeaad..1b19728 100644 (file)
@@ -81,6 +81,7 @@ struct perf_sample {
        u32 raw_size;
        void *raw_data;
        struct ip_callchain *callchain;
+       struct branch_stack *branch_stack;
 };
 
 #define BUILD_ID_SIZE 20
index ea32a06..159263d 100644 (file)
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
        ++evlist->nr_entries;
 }
 
-static void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
-                                         struct list_head *list,
-                                         int nr_entries)
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+                                  struct list_head *list,
+                                  int nr_entries)
 {
        list_splice_tail(list, &evlist->entries);
        evlist->nr_entries += nr_entries;
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
        return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-                            pid_t target_tid, const char *cpu_list)
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+                            const char *target_tid, uid_t uid, const char *cpu_list)
 {
-       evlist->threads = thread_map__new(target_pid, target_tid);
+       evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
 
        if (evlist->threads == NULL)
                return -1;
 
-       if (cpu_list == NULL && target_tid != -1)
+       if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
                evlist->cpus = cpu_map__dummy_new();
        else
                evlist->cpus = cpu_map__new(cpu_list);
@@ -765,6 +765,7 @@ out_err:
        list_for_each_entry_reverse(evsel, &evlist->entries, node)
                perf_evsel__close(evsel, ncpus, nthreads);
 
+       errno = -err;
        return err;
 }
 
@@ -824,7 +825,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
                exit(-1);
        }
 
-       if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1)
+       if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
                evlist->threads->map[0] = evlist->workload.pid;
 
        close(child_ready_pipe[1]);
index 8922aee..21f1c9e 100644 (file)
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
        evlist->threads = threads;
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-                            pid_t target_tid, const char *cpu_list);
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+                            const char *tid, uid_t uid, const char *cpu_list);
 void perf_evlist__delete_maps(struct perf_evlist *evlist);
 int perf_evlist__set_filters(struct perf_evlist *evlist);
 
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
 
 bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
 bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+                                  struct list_head *list,
+                                  int nr_entries);
+
 #endif /* __PERF_EVLIST_H */
index 7132ee8..f421f7c 100644 (file)
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
        struct perf_event_attr *attr = &evsel->attr;
        int track = !evsel->idx; /* only the first counter needs these */
 
-       attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+       attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
        attr->inherit       = !opts->no_inherit;
        attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
                              PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
        if (opts->period)
                attr->sample_type       |= PERF_SAMPLE_PERIOD;
 
-       if (opts->sample_id_all_avail &&
+       if (!opts->sample_id_all_missing &&
            (opts->sample_time || opts->system_wide ||
             !opts->no_inherit || opts->cpu_list))
                attr->sample_type       |= PERF_SAMPLE_TIME;
@@ -126,11 +126,15 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
                attr->watermark = 0;
                attr->wakeup_events = 1;
        }
+       if (opts->branch_stack) {
+               attr->sample_type       |= PERF_SAMPLE_BRANCH_STACK;
+               attr->branch_sample_type = opts->branch_stack;
+       }
 
        attr->mmap = track;
        attr->comm = track;
 
-       if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
+       if (!opts->target_pid && !opts->target_tid && !opts->system_wide) {
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
@@ -536,7 +540,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
        }
 
        if (type & PERF_SAMPLE_READ) {
-               fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
+               fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
                return -1;
        }
 
@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
                data->raw_data = (void *) pdata;
        }
 
+       if (type & PERF_SAMPLE_BRANCH_STACK) {
+               u64 sz;
+
+               data->branch_stack = (struct branch_stack *)array;
+               array++; /* nr */
+
+               sz = data->branch_stack->nr * sizeof(struct branch_entry);
+               sz /= sizeof(u64);
+               array += sz;
+       }
        return 0;
 }
 
index ecd7f4d..fcd9cf3 100644 (file)
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id)
        return NULL;
 }
 
-static const char *__perf_magic = "PERFFILE";
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2    = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
 
-#define PERF_MAGIC     (*(u64 *)__perf_magic)
+#define PERF_MAGIC     __perf_magic2
 
 struct perf_file_attr {
        struct perf_event_attr  attr;
@@ -280,7 +291,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
        if (realname == NULL || filename == NULL || linkname == NULL)
                goto out_free;
 
-       len = snprintf(filename, size, "%s%s%s",
+       len = scnprintf(filename, size, "%s%s%s",
                       debugdir, is_kallsyms ? "/" : "", realname);
        if (mkdir_p(filename, 0755))
                goto out_free;
@@ -295,7 +306,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                        goto out_free;
        }
 
-       len = snprintf(linkname, size, "%s/.build-id/%.2s",
+       len = scnprintf(linkname, size, "%s/.build-id/%.2s",
                       debugdir, sbuild_id);
 
        if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
@@ -1012,6 +1023,12 @@ write_it:
        return do_write_string(fd, buffer);
 }
 
+static int write_branch_stack(int fd __used, struct perf_header *h __used,
+                      struct perf_evlist *evlist __used)
+{
+       return 0;
+}
+
 static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
 {
        char *str = do_read_string(fd, ph);
@@ -1133,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
        uint64_t id;
        void *buf = NULL;
        char *str;
-       u32 nre, sz, nr, i, j, msz;
-       int ret;
+       u32 nre, sz, nr, i, j;
+       ssize_t ret;
+       size_t msz;
 
        /* number of events */
        ret = read(fd, &nre, sizeof(nre));
@@ -1151,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
        if (ph->needs_swap)
                sz = bswap_32(sz);
 
-       /*
-        * ensure it is at least to our ABI rev
-        */
-       if (sz < (u32)sizeof(attr))
-               goto error;
-
        memset(&attr, 0, sizeof(attr));
 
-       /* read entire region to sync up to next field */
+       /* buffer to hold on file attr struct */
        buf = malloc(sz);
        if (!buf)
                goto error;
 
        msz = sizeof(attr);
-       if (sz < msz)
+       if (sz < (ssize_t)msz)
                msz = sz;
 
        for (i = 0 ; i < nre; i++) {
 
+               /*
+                * must read entire on-file attr struct to
+                * sync up with layout.
+                */
                ret = read(fd, buf, sz);
                if (ret != (ssize_t)sz)
                        goto error;
@@ -1305,25 +1321,204 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
        free(str);
 }
 
+static void print_branch_stack(struct perf_header *ph __used, int fd __used,
+                              FILE *fp)
+{
+       fprintf(fp, "# contains samples with branch stack\n");
+}
+
+static int __event_process_build_id(struct build_id_event *bev,
+                                   char *filename,
+                                   struct perf_session *session)
+{
+       int err = -1;
+       struct list_head *head;
+       struct machine *machine;
+       u16 misc;
+       struct dso *dso;
+       enum dso_kernel_type dso_type;
+
+       machine = perf_session__findnew_machine(session, bev->pid);
+       if (!machine)
+               goto out;
+
+       misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+       switch (misc) {
+       case PERF_RECORD_MISC_KERNEL:
+               dso_type = DSO_TYPE_KERNEL;
+               head = &machine->kernel_dsos;
+               break;
+       case PERF_RECORD_MISC_GUEST_KERNEL:
+               dso_type = DSO_TYPE_GUEST_KERNEL;
+               head = &machine->kernel_dsos;
+               break;
+       case PERF_RECORD_MISC_USER:
+       case PERF_RECORD_MISC_GUEST_USER:
+               dso_type = DSO_TYPE_USER;
+               head = &machine->user_dsos;
+               break;
+       default:
+               goto out;
+       }
+
+       dso = __dsos__findnew(head, filename);
+       if (dso != NULL) {
+               char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+               dso__set_build_id(dso, &bev->build_id);
+
+               if (filename[0] == '[')
+                       dso->kernel = dso_type;
+
+               build_id__sprintf(dso->build_id, sizeof(dso->build_id),
+                                 sbuild_id);
+               pr_debug("build id event received for %s: %s\n",
+                        dso->long_name, sbuild_id);
+       }
+
+       err = 0;
+out:
+       return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+                                                int input, u64 offset, u64 size)
+{
+       struct perf_session *session = container_of(header, struct perf_session, header);
+       struct {
+               struct perf_event_header   header;
+               u8                         build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+               char                       filename[0];
+       } old_bev;
+       struct build_id_event bev;
+       char filename[PATH_MAX];
+       u64 limit = offset + size;
+
+       while (offset < limit) {
+               ssize_t len;
+
+               if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+                       return -1;
+
+               if (header->needs_swap)
+                       perf_event_header__bswap(&old_bev.header);
+
+               len = old_bev.header.size - sizeof(old_bev);
+               if (read(input, filename, len) != len)
+                       return -1;
+
+               bev.header = old_bev.header;
+
+               /*
+                * As the pid is the missing value, we need to fill
+                * it properly. The header.misc value give us nice hint.
+                */
+               bev.pid = HOST_KERNEL_ID;
+               if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+                   bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+                       bev.pid = DEFAULT_GUEST_KERNEL_ID;
+
+               memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+               __event_process_build_id(&bev, filename, session);
+
+               offset += bev.header.size;
+       }
+
+       return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+                                      int input, u64 offset, u64 size)
+{
+       struct perf_session *session = container_of(header, struct perf_session, header);
+       struct build_id_event bev;
+       char filename[PATH_MAX];
+       u64 limit = offset + size, orig_offset = offset;
+       int err = -1;
+
+       while (offset < limit) {
+               ssize_t len;
+
+               if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+                       goto out;
+
+               if (header->needs_swap)
+                       perf_event_header__bswap(&bev.header);
+
+               len = bev.header.size - sizeof(bev);
+               if (read(input, filename, len) != len)
+                       goto out;
+               /*
+                * The a1645ce1 changeset:
+                *
+                * "perf: 'perf kvm' tool for monitoring guest performance from host"
+                *
+                * Added a field to struct build_id_event that broke the file
+                * format.
+                *
+                * Since the kernel build-id is the first entry, process the
+                * table using the old format if the well known
+                * '[kernel.kallsyms]' string for the kernel build-id has the
+                * first 4 characters chopped off (where the pid_t sits).
+                */
+               if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+                       if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+                               return -1;
+                       return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+               }
+
+               __event_process_build_id(&bev, filename, session);
+
+               offset += bev.header.size;
+       }
+       err = 0;
+out:
+       return err;
+}
+
+static int process_trace_info(struct perf_file_section *section __unused,
+                             struct perf_header *ph __unused,
+                             int feat __unused, int fd)
+{
+       trace_report(fd, false);
+       return 0;
+}
+
+static int process_build_id(struct perf_file_section *section,
+                           struct perf_header *ph,
+                           int feat __unused, int fd)
+{
+       if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
+               pr_debug("Failed to read buildids, continuing...\n");
+       return 0;
+}
+
 struct feature_ops {
        int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
        void (*print)(struct perf_header *h, int fd, FILE *fp);
+       int (*process)(struct perf_file_section *section,
+                      struct perf_header *h, int feat, int fd);
        const char *name;
        bool full_only;
 };
 
 #define FEAT_OPA(n, func) \
        [n] = { .name = #n, .write = write_##func, .print = print_##func }
+#define FEAT_OPP(n, func) \
+       [n] = { .name = #n, .write = write_##func, .print = print_##func, \
+               .process = process_##func }
 #define FEAT_OPF(n, func) \
-       [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true }
+       [n] = { .name = #n, .write = write_##func, .print = print_##func, \
+               .full_only = true }
 
 /* feature_ops not implemented: */
 #define print_trace_info               NULL
 #define print_build_id                 NULL
 
 static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
-       FEAT_OPA(HEADER_TRACE_INFO,     trace_info),
-       FEAT_OPA(HEADER_BUILD_ID,       build_id),
+       FEAT_OPP(HEADER_TRACE_INFO,     trace_info),
+       FEAT_OPP(HEADER_BUILD_ID,       build_id),
        FEAT_OPA(HEADER_HOSTNAME,       hostname),
        FEAT_OPA(HEADER_OSRELEASE,      osrelease),
        FEAT_OPA(HEADER_VERSION,        version),
@@ -1336,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPA(HEADER_CMDLINE,        cmdline),
        FEAT_OPF(HEADER_CPU_TOPOLOGY,   cpu_topology),
        FEAT_OPF(HEADER_NUMA_TOPOLOGY,  numa_topology),
+       FEAT_OPA(HEADER_BRANCH_STACK,   branch_stack),
 };
 
 struct header_print_data {
@@ -1620,24 +1816,128 @@ out_free:
        return err;
 }
 
+static const int attr_file_abi_sizes[] = {
+       [0] = PERF_ATTR_SIZE_VER0,
+       [1] = PERF_ATTR_SIZE_VER1,
+       0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+       uint64_t ref_size, attr_size;
+       int i;
+
+       for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+               ref_size = attr_file_abi_sizes[i]
+                        + sizeof(struct perf_file_section);
+               if (hdr_sz != ref_size) {
+                       attr_size = bswap_64(hdr_sz);
+                       if (attr_size != ref_size)
+                               continue;
+
+                       ph->needs_swap = true;
+               }
+               pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+                        i,
+                        ph->needs_swap);
+               return 0;
+       }
+       /* could not determine endianness */
+       return -1;
+}
+
+#define PERF_PIPE_HDR_VER0     16
+
+static const size_t attr_pipe_abi_sizes[] = {
+       [0] = PERF_PIPE_HDR_VER0,
+       0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revsions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+       u64 attr_size;
+       int i;
+
+       for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+               if (hdr_sz != attr_pipe_abi_sizes[i]) {
+                       attr_size = bswap_64(hdr_sz);
+                       if (attr_size != hdr_sz)
+                               continue;
+
+                       ph->needs_swap = true;
+               }
+               pr_debug("Pipe ABI%d perf.data file detected\n", i);
+               return 0;
+       }
+       return -1;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+                             bool is_pipe, struct perf_header *ph)
+{
+       int ret;
+
+       /* check for legacy format */
+       ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+       if (ret == 0) {
+               pr_debug("legacy perf.data format\n");
+               if (is_pipe)
+                       return try_all_pipe_abis(hdr_sz, ph);
+
+               return try_all_file_abis(hdr_sz, ph);
+       }
+       /*
+        * the new magic number serves two purposes:
+        * - unique number to identify actual perf.data files
+        * - encode endianness of file
+        */
+
+       /* check magic number with one endianness */
+       if (magic == __perf_magic2)
+               return 0;
+
+       /* check magic number with opposite endianness */
+       if (magic != __perf_magic2_sw)
+               return -1;
+
+       ph->needs_swap = true;
+
+       return 0;
+}
+
 int perf_file_header__read(struct perf_file_header *header,
                           struct perf_header *ph, int fd)
 {
+       int ret;
+
        lseek(fd, 0, SEEK_SET);
 
-       if (readn(fd, header, sizeof(*header)) <= 0 ||
-           memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
+       ret = readn(fd, header, sizeof(*header));
+       if (ret <= 0)
                return -1;
 
-       if (header->attr_size != sizeof(struct perf_file_attr)) {
-               u64 attr_size = bswap_64(header->attr_size);
-
-               if (attr_size != sizeof(struct perf_file_attr))
-                       return -1;
+       if (check_magic_endian(header->magic,
+                              header->attr_size, false, ph) < 0) {
+               pr_debug("magic/endian check failed\n");
+               return -1;
+       }
 
+       if (ph->needs_swap) {
                mem_bswap_64(header, offsetof(struct perf_file_header,
-                                           adds_features));
-               ph->needs_swap = true;
+                            adds_features));
        }
 
        if (header->size != sizeof(*header)) {
@@ -1689,156 +1989,6 @@ int perf_file_header__read(struct perf_file_header *header,
        return 0;
 }
 
-static int __event_process_build_id(struct build_id_event *bev,
-                                   char *filename,
-                                   struct perf_session *session)
-{
-       int err = -1;
-       struct list_head *head;
-       struct machine *machine;
-       u16 misc;
-       struct dso *dso;
-       enum dso_kernel_type dso_type;
-
-       machine = perf_session__findnew_machine(session, bev->pid);
-       if (!machine)
-               goto out;
-
-       misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-       switch (misc) {
-       case PERF_RECORD_MISC_KERNEL:
-               dso_type = DSO_TYPE_KERNEL;
-               head = &machine->kernel_dsos;
-               break;
-       case PERF_RECORD_MISC_GUEST_KERNEL:
-               dso_type = DSO_TYPE_GUEST_KERNEL;
-               head = &machine->kernel_dsos;
-               break;
-       case PERF_RECORD_MISC_USER:
-       case PERF_RECORD_MISC_GUEST_USER:
-               dso_type = DSO_TYPE_USER;
-               head = &machine->user_dsos;
-               break;
-       default:
-               goto out;
-       }
-
-       dso = __dsos__findnew(head, filename);
-       if (dso != NULL) {
-               char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-               dso__set_build_id(dso, &bev->build_id);
-
-               if (filename[0] == '[')
-                       dso->kernel = dso_type;
-
-               build_id__sprintf(dso->build_id, sizeof(dso->build_id),
-                                 sbuild_id);
-               pr_debug("build id event received for %s: %s\n",
-                        dso->long_name, sbuild_id);
-       }
-
-       err = 0;
-out:
-       return err;
-}
-
-static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
-                                                int input, u64 offset, u64 size)
-{
-       struct perf_session *session = container_of(header, struct perf_session, header);
-       struct {
-               struct perf_event_header   header;
-               u8                         build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
-               char                       filename[0];
-       } old_bev;
-       struct build_id_event bev;
-       char filename[PATH_MAX];
-       u64 limit = offset + size;
-
-       while (offset < limit) {
-               ssize_t len;
-
-               if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
-                       return -1;
-
-               if (header->needs_swap)
-                       perf_event_header__bswap(&old_bev.header);
-
-               len = old_bev.header.size - sizeof(old_bev);
-               if (read(input, filename, len) != len)
-                       return -1;
-
-               bev.header = old_bev.header;
-
-               /*
-                * As the pid is the missing value, we need to fill
-                * it properly. The header.misc value give us nice hint.
-                */
-               bev.pid = HOST_KERNEL_ID;
-               if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
-                   bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
-                       bev.pid = DEFAULT_GUEST_KERNEL_ID;
-
-               memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
-               __event_process_build_id(&bev, filename, session);
-
-               offset += bev.header.size;
-       }
-
-       return 0;
-}
-
-static int perf_header__read_build_ids(struct perf_header *header,
-                                      int input, u64 offset, u64 size)
-{
-       struct perf_session *session = container_of(header, struct perf_session, header);
-       struct build_id_event bev;
-       char filename[PATH_MAX];
-       u64 limit = offset + size, orig_offset = offset;
-       int err = -1;
-
-       while (offset < limit) {
-               ssize_t len;
-
-               if (read(input, &bev, sizeof(bev)) != sizeof(bev))
-                       goto out;
-
-               if (header->needs_swap)
-                       perf_event_header__bswap(&bev.header);
-
-               len = bev.header.size - sizeof(bev);
-               if (read(input, filename, len) != len)
-                       goto out;
-               /*
-                * The a1645ce1 changeset:
-                *
-                * "perf: 'perf kvm' tool for monitoring guest performance from host"
-                *
-                * Added a field to struct build_id_event that broke the file
-                * format.
-                *
-                * Since the kernel build-id is the first entry, process the
-                * table using the old format if the well known
-                * '[kernel.kallsyms]' string for the kernel build-id has the
-                * first 4 characters chopped off (where the pid_t sits).
-                */
-               if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
-                       if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
-                               return -1;
-                       return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
-               }
-
-               __event_process_build_id(&bev, filename, session);
-
-               offset += bev.header.size;
-       }
-       err = 0;
-out:
-       return err;
-}
-
 static int perf_file_section__process(struct perf_file_section *section,
                                      struct perf_header *ph,
                                      int feat, int fd, void *data __used)
@@ -1854,40 +2004,32 @@ static int perf_file_section__process(struct perf_file_section *section,
                return 0;
        }
 
-       switch (feat) {
-       case HEADER_TRACE_INFO:
-               trace_report(fd, false);
-               break;
-       case HEADER_BUILD_ID:
-               if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
-                       pr_debug("Failed to read buildids, continuing...\n");
-               break;
-       default:
-               break;
-       }
+       if (!feat_ops[feat].process)
+               return 0;
 
-       return 0;
+       return feat_ops[feat].process(section, ph, feat, fd);
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
                                       struct perf_header *ph, int fd,
                                       bool repipe)
 {
-       if (readn(fd, header, sizeof(*header)) <= 0 ||
-           memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
-               return -1;
+       int ret;
 
-       if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
+       ret = readn(fd, header, sizeof(*header));
+       if (ret <= 0)
                return -1;
 
-       if (header->size != sizeof(*header)) {
-               u64 size = bswap_64(header->size);
+       if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+               pr_debug("endian/magic failed\n");
+               return -1;
+       }
 
-               if (size != sizeof(*header))
-                       return -1;
+       if (ph->needs_swap)
+               header->size = bswap_64(header->size);
 
-               ph->needs_swap = true;
-       }
+       if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
+               return -1;
 
        return 0;
 }
@@ -1908,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
        return 0;
 }
 
+static int read_attr(int fd, struct perf_header *ph,
+                    struct perf_file_attr *f_attr)
+{
+       struct perf_event_attr *attr = &f_attr->attr;
+       size_t sz, left;
+       size_t our_sz = sizeof(f_attr->attr);
+       int ret;
+
+       memset(f_attr, 0, sizeof(*f_attr));
+
+       /* read minimal guaranteed structure */
+       ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+       if (ret <= 0) {
+               pr_debug("cannot read %d bytes of header attr\n",
+                        PERF_ATTR_SIZE_VER0);
+               return -1;
+       }
+
+       /* on file perf_event_attr size */
+       sz = attr->size;
+
+       if (ph->needs_swap)
+               sz = bswap_32(sz);
+
+       if (sz == 0) {
+               /* assume ABI0 */
+               sz =  PERF_ATTR_SIZE_VER0;
+       } else if (sz > our_sz) {
+               pr_debug("file uses a more recent and unsupported ABI"
+                        " (%zu bytes extra)\n", sz - our_sz);
+               return -1;
+       }
+       /* what we have not yet read and that we know about */
+       left = sz - PERF_ATTR_SIZE_VER0;
+       if (left) {
+               void *ptr = attr;
+               ptr += PERF_ATTR_SIZE_VER0;
+
+               ret = readn(fd, ptr, left);
+       }
+       /* read perf_file_section, ids are read in caller */
+       ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+       return ret <= 0 ? -1 : 0;
+}
+
 int perf_session__read_header(struct perf_session *session, int fd)
 {
        struct perf_header *header = &session->header;
@@ -1923,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
        if (session->fd_pipe)
                return perf_header__read_pipe(session, fd);
 
-       if (perf_file_header__read(&f_header, header, fd) < 0) {
-               pr_debug("incompatible file format\n");
+       if (perf_file_header__read(&f_header, header, fd) < 0)
                return -EINVAL;
-       }
 
-       nr_attrs = f_header.attrs.size / sizeof(f_attr);
+       nr_attrs = f_header.attrs.size / f_header.attr_size;
        lseek(fd, f_header.attrs.offset, SEEK_SET);
 
        for (i = 0; i < nr_attrs; i++) {
                struct perf_evsel *evsel;
                off_t tmp;
 
-               if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
+               if (read_attr(fd, header, &f_attr) < 0)
                        goto out_errno;
 
                if (header->needs_swap)
index ac4ec95..21a6be0 100644 (file)
@@ -11,6 +11,7 @@
 
 enum {
        HEADER_RESERVED         = 0,    /* always cleared */
+       HEADER_FIRST_FEATURE    = 1,
        HEADER_TRACE_INFO       = 1,
        HEADER_BUILD_ID,
 
@@ -26,7 +27,7 @@ enum {
        HEADER_EVENT_DESC,
        HEADER_CPU_TOPOLOGY,
        HEADER_NUMA_TOPOLOGY,
-
+       HEADER_BRANCH_STACK,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index 6f505d1..3dc99a9 100644 (file)
@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
                hists__set_col_len(hists, col, 0);
 }
 
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+       const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+       if (hists__col_len(hists, dso) < unresolved_col_width &&
+           !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+           !symbol_conf.dso_list)
+               hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
 static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 {
+       const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
        u16 len;
 
        if (h->ms.sym)
-               hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
-       else {
-               const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
-
-               if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
-                   !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
-                   !symbol_conf.dso_list)
-                       hists__set_col_len(hists, HISTC_DSO,
-                                          unresolved_col_width);
-       }
+               hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
+       else
+               hists__set_unres_dso_col_len(hists, HISTC_DSO);
 
        len = thread__comm_len(h->thread);
        if (hists__new_col_len(hists, HISTC_COMM, len))
@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                len = dso__name_len(h->ms.map->dso);
                hists__new_col_len(hists, HISTC_DSO, len);
        }
+
+       if (h->branch_info) {
+               int symlen;
+               /*
+                * +4 accounts for '[x] ' priv level info
+                * +2 account of 0x prefix on raw addresses
+                */
+               if (h->branch_info->from.sym) {
+                       symlen = (int)h->branch_info->from.sym->namelen + 4;
+                       hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+                       symlen = dso__name_len(h->branch_info->from.map->dso);
+                       hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+               } else {
+                       symlen = unresolved_col_width + 4 + 2;
+                       hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+                       hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+               }
+
+               if (h->branch_info->to.sym) {
+                       symlen = (int)h->branch_info->to.sym->namelen + 4;
+                       hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+                       symlen = dso__name_len(h->branch_info->to.map->dso);
+                       hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+               } else {
+                       symlen = unresolved_col_width + 4 + 2;
+                       hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+                       hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+               }
+       }
 }
 
 static void hist_entry__add_cpumode_period(struct hist_entry *he,
@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
        return 0;
 }
 
-struct hist_entry *__hists__add_entry(struct hists *hists,
+static struct hist_entry *add_hist_entry(struct hists *hists,
+                                     struct hist_entry *entry,
                                      struct addr_location *al,
-                                     struct symbol *sym_parent, u64 period)
+                                     u64 period)
 {
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct hist_entry *he;
-       struct hist_entry entry = {
-               .thread = al->thread,
-               .ms = {
-                       .map    = al->map,
-                       .sym    = al->sym,
-               },
-               .cpu    = al->cpu,
-               .ip     = al->addr,
-               .level  = al->level,
-               .period = period,
-               .parent = sym_parent,
-               .filtered = symbol__parent_filter(sym_parent),
-       };
        int cmp;
 
        pthread_mutex_lock(&hists->lock);
@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                parent = *p;
                he = rb_entry(parent, struct hist_entry, rb_node_in);
 
-               cmp = hist_entry__cmp(&entry, he);
+               cmp = hist_entry__cmp(entry, he);
 
                if (!cmp) {
                        he->period += period;
@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                        p = &(*p)->rb_right;
        }
 
-       he = hist_entry__new(&entry);
+       he = hist_entry__new(entry);
        if (!he)
                goto out_unlock;
 
@@ -252,6 +275,51 @@ out_unlock:
        return he;
 }
 
+struct hist_entry *__hists__add_branch_entry(struct hists *self,
+                                            struct addr_location *al,
+                                            struct symbol *sym_parent,
+                                            struct branch_info *bi,
+                                            u64 period)
+{
+       struct hist_entry entry = {
+               .thread = al->thread,
+               .ms = {
+                       .map    = bi->to.map,
+                       .sym    = bi->to.sym,
+               },
+               .cpu    = al->cpu,
+               .ip     = bi->to.addr,
+               .level  = al->level,
+               .period = period,
+               .parent = sym_parent,
+               .filtered = symbol__parent_filter(sym_parent),
+               .branch_info = bi,
+       };
+
+       return add_hist_entry(self, &entry, al, period);
+}
+
+struct hist_entry *__hists__add_entry(struct hists *self,
+                                     struct addr_location *al,
+                                     struct symbol *sym_parent, u64 period)
+{
+       struct hist_entry entry = {
+               .thread = al->thread,
+               .ms = {
+                       .map    = al->map,
+                       .sym    = al->sym,
+               },
+               .cpu    = al->cpu,
+               .ip     = al->addr,
+               .level  = al->level,
+               .period = period,
+               .parent = sym_parent,
+               .filtered = symbol__parent_filter(sym_parent),
+       };
+
+       return add_hist_entry(self, &entry, al, period);
+}
+
 int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -768,7 +836,7 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
                                                     sep ? "%.2f" : "   %6.2f%%",
                                                     (period * 100.0) / total);
                else
-                       ret = snprintf(s, size, sep ? "%.2f" : "   %6.2f%%",
+                       ret = scnprintf(s, size, sep ? "%.2f" : "   %6.2f%%",
                                       (period * 100.0) / total);
                if (symbol_conf.show_cpu_utilization) {
                        ret += percent_color_snprintf(s + ret, size - ret,
@@ -791,20 +859,20 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
                        }
                }
        } else
-               ret = snprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period);
+               ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period);
 
        if (symbol_conf.show_nr_samples) {
                if (sep)
-                       ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
+                       ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
                else
-                       ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
+                       ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
        }
 
        if (symbol_conf.show_total_period) {
                if (sep)
-                       ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period);
+                       ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period);
                else
-                       ret += snprintf(s + ret, size - ret, " %12" PRIu64, period);
+                       ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period);
        }
 
        if (pair_hists) {
@@ -819,25 +887,25 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
                diff = new_percent - old_percent;
 
                if (fabs(diff) >= 0.01)
-                       snprintf(bf, sizeof(bf), "%+4.2F%%", diff);
+                       ret += scnprintf(bf, sizeof(bf), "%+4.2F%%", diff);
                else
-                       snprintf(bf, sizeof(bf), " ");
+                       ret += scnprintf(bf, sizeof(bf), " ");
 
                if (sep)
-                       ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf);
+                       ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf);
                else
-                       ret += snprintf(s + ret, size - ret, "%11.11s", bf);
+                       ret += scnprintf(s + ret, size - ret, "%11.11s", bf);
 
                if (show_displacement) {
                        if (displacement)
-                               snprintf(bf, sizeof(bf), "%+4ld", displacement);
+                               ret += scnprintf(bf, sizeof(bf), "%+4ld", displacement);
                        else
-                               snprintf(bf, sizeof(bf), " ");
+                               ret += scnprintf(bf, sizeof(bf), " ");
 
                        if (sep)
-                               ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf);
+                               ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf);
                        else
-                               ret += snprintf(s + ret, size - ret, "%6.6s", bf);
+                               ret += scnprintf(s + ret, size - ret, "%6.6s", bf);
                }
        }
 
@@ -855,7 +923,7 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
                if (se->elide)
                        continue;
 
-               ret += snprintf(s + ret, size - ret, "%s", sep ?: "  ");
+               ret += scnprintf(s + ret, size - ret, "%s", sep ?: "  ");
                ret += se->se_snprintf(he, s + ret, size - ret,
                                       hists__col_len(hists, se->se_width_idx));
        }
index f55f0a8..9413f3e 100644 (file)
@@ -32,6 +32,7 @@ struct events_stats {
        u32 nr_unknown_events;
        u32 nr_invalid_chains;
        u32 nr_unknown_id;
+       u32 nr_unprocessable_samples;
 };
 
 enum hist_column {
@@ -41,6 +42,11 @@ enum hist_column {
        HISTC_COMM,
        HISTC_PARENT,
        HISTC_CPU,
+       HISTC_MISPREDICT,
+       HISTC_SYMBOL_FROM,
+       HISTC_SYMBOL_TO,
+       HISTC_DSO_FROM,
+       HISTC_DSO_TO,
        HISTC_NR_COLS, /* Last entry */
 };
 
@@ -55,6 +61,7 @@ struct hists {
        u64                     nr_entries;
        const struct thread     *thread_filter;
        const struct dso        *dso_filter;
+       const char              *uid_filter_str;
        pthread_mutex_t         lock;
        struct events_stats     stats;
        u64                     event_stream;
@@ -72,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
                         struct hists *hists);
 void hist_entry__free(struct hist_entry *);
 
+struct hist_entry *__hists__add_branch_entry(struct hists *self,
+                                            struct addr_location *al,
+                                            struct symbol *sym_parent,
+                                            struct branch_info *bi,
+                                            u64 period);
+
 void hists__output_resort(struct hists *self);
 void hists__output_resort_threaded(struct hists *hists);
 void hists__collapse_resort(struct hists *self);
index bb4198e..afe3819 100644 (file)
@@ -2,10 +2,12 @@
 #ifndef PERF_DWARF2_H
 #define PERF_DWARF2_H
 
-/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
 
 #define CFI_STARTPROC
 #define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
 
 #endif /* PERF_DWARF2_H */
 
diff --git a/tools/perf/util/include/asm/unistd_32.h b/tools/perf/util/include/asm/unistd_32.h
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
diff --git a/tools/perf/util/include/asm/unistd_64.h b/tools/perf/util/include/asm/unistd_64.h
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
index eda4416..bb162e4 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/bitops.h>
 
 int __bitmap_weight(const unsigned long *bitmap, int bits);
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                const unsigned long *bitmap2, int bits);
 
 #define BITMAP_LAST_WORD_MASK(nbits)                                   \
 (                                                                      \
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
        return __bitmap_weight(src, nbits);
 }
 
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+                            const unsigned long *src2, int nbits)
+{
+       if (small_const_nbits(nbits))
+               *dst = *src1 | *src2;
+       else
+               __bitmap_or(dst, src1, src2, nbits);
+}
+
 #endif /* _PERF_BITOPS_H */
index 316aa0a..dea6d1c 100644 (file)
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp)
                       self->start, self->end, self->pgoff, self->dso->name);
 }
 
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+       const char *dsoname;
+
+       if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+               if (symbol_conf.show_kernel_path && map->dso->long_name)
+                       dsoname = map->dso->long_name;
+               else if (map->dso->name)
+                       dsoname = map->dso->name;
+       } else
+               dsoname = "[unknown]";
+
+       return fprintf(fp, "%s", dsoname);
+}
+
 /*
  * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
  * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
index 2b8017f..b100c20 100644 (file)
@@ -118,6 +118,7 @@ void map__delete(struct map *self);
 struct map *map__clone(struct map *self);
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *self, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
 
 int map__load(struct map *self, symbol_filter_t filter);
 struct symbol *map__find_symbol(struct map *self,
index b029296..c7a6f6f 100644 (file)
@@ -165,7 +165,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
        struct tracepoint_path *path = NULL;
        DIR *sys_dir, *evt_dir;
        struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
-       char id_buf[4];
+       char id_buf[24];
        int fd;
        u64 id;
        char evt_path[MAXPATHLEN];
index e33554a..8a8ee64 100644 (file)
@@ -34,7 +34,6 @@
 
 #include "util.h"
 #include "event.h"
-#include "string.h"
 #include "strlist.h"
 #include "debug.h"
 #include "cache.h"
@@ -273,10 +272,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                          struct probe_trace_event **tevs,
-                                         int max_tevs, const char *module)
+                                         int max_tevs, const char *target)
 {
        bool need_dwarf = perf_probe_event_need_dwarf(pev);
-       struct debuginfo *dinfo = open_debuginfo(module);
+       struct debuginfo *dinfo = open_debuginfo(target);
        int ntevs, ret = 0;
 
        if (!dinfo) {
@@ -295,9 +294,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
        if (ntevs > 0) {        /* Succeeded to find trace events */
                pr_debug("find %d probe_trace_events.\n", ntevs);
-               if (module)
+               if (target)
                        ret = add_module_to_probe_trace_events(*tevs, ntevs,
-                                                              module);
+                                                              target);
                return ret < 0 ? ret : ntevs;
        }
 
@@ -1729,7 +1728,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
        }
 
        ret = 0;
-       printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":");
+       printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
        for (i = 0; i < ntevs; i++) {
                tev = &tevs[i];
                if (pev->event)
@@ -1784,7 +1783,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 
        if (ret >= 0) {
                /* Show how to use the event. */
-               printf("\nYou can now use it on all perf tools, such as:\n\n");
+               printf("\nYou can now use it in all perf tools, such as:\n\n");
                printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
                         tev->event);
        }
@@ -1796,14 +1795,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                                          struct probe_trace_event **tevs,
-                                         int max_tevs, const char *module)
+                                         int max_tevs, const char *target)
 {
        struct symbol *sym;
        int ret = 0, i;
        struct probe_trace_event *tev;
 
        /* Convert perf_probe_event with debuginfo */
-       ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
+       ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
        if (ret != 0)
                return ret;     /* Found in debuginfo or got an error */
 
@@ -1819,8 +1818,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                goto error;
        }
 
-       if (module) {
-               tev->point.module = strdup(module);
+       if (target) {
+               tev->point.module = strdup(target);
                if (tev->point.module == NULL) {
                        ret = -ENOMEM;
                        goto error;
@@ -1890,7 +1889,7 @@ struct __event_package {
 };
 
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-                         int max_tevs, const char *module, bool force_add)
+                         int max_tevs, const char *target, bool force_add)
 {
        int i, j, ret;
        struct __event_package *pkgs;
@@ -1913,7 +1912,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
                ret  = convert_to_probe_trace_events(pkgs[i].pev,
                                                     &pkgs[i].tevs,
                                                     max_tevs,
-                                                    module);
+                                                    target);
                if (ret < 0)
                        goto end;
                pkgs[i].ntevs = ret;
@@ -1965,7 +1964,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
                goto error;
        }
 
-       printf("Remove event: %s\n", ent->s);
+       printf("Removed event: %s\n", ent->s);
        return 0;
 error:
        pr_warning("Failed to delete event: %s\n", strerror(-ret));
@@ -2069,7 +2068,7 @@ static int filter_available_functions(struct map *map __unused,
        return 1;
 }
 
-int show_available_funcs(const char *module, struct strfilter *_filter)
+int show_available_funcs(const char *target, struct strfilter *_filter)
 {
        struct map *map;
        int ret;
@@ -2080,9 +2079,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter)
        if (ret < 0)
                return ret;
 
-       map = kernel_get_module_map(module);
+       map = kernel_get_module_map(target);
        if (!map) {
-               pr_err("Failed to find %s map.\n", (module) ? : "kernel");
+               pr_err("Failed to find %s map.\n", (target) ? : "kernel");
                return -EINVAL;
        }
        available_func_filter = _filter;
index 74bd2e6..2cc162d 100644 (file)
@@ -30,7 +30,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
-#include <ctype.h>
 #include <dwarf-regs.h>
 
 #include <linux/bitops.h>
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644 (file)
index 0000000..2884e67
--- /dev/null
@@ -0,0 +1,19 @@
+#
+# List of files needed by perf python extention
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+util/ctype.c
+util/evlist.c
+util/evsel.c
+util/cpumap.c
+util/thread_map.c
+util/util.c
+util/xyarray.c
+util/cgroup.c
+util/debugfs.c
+util/strlist.c
+../../lib/rbtree.c
index 9dd47a4..e03b58a 100644 (file)
@@ -425,14 +425,14 @@ struct pyrf_thread_map {
 static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
                                 PyObject *args, PyObject *kwargs)
 {
-       static char *kwlist[] = { "pid", "tid", NULL };
-       int pid = -1, tid = -1;
+       static char *kwlist[] = { "pid", "tid", "uid", NULL };
+       int pid = -1, tid = -1, uid = UINT_MAX;
 
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
-                                        kwlist, &pid, &tid))
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+                                        kwlist, &pid, &tid, &uid))
                return -1;
 
-       pthreads->threads = thread_map__new(pid, tid);
+       pthreads->threads = thread_map__new(pid, tid, uid);
        if (pthreads->threads == NULL)
                return -1;
        return 0;
index 0b2a487..c2623c6 100644 (file)
@@ -24,7 +24,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 #include <errno.h>
 
 #include "../../perf.h"
index b5ca255..002ebbf 100644 (file)
@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
                self->fd = STDIN_FILENO;
 
                if (perf_session__read_header(self, self->fd) < 0)
-                       pr_err("incompatible file format");
+                       pr_err("incompatible file format (rerun with -v to learn more)");
 
                return 0;
        }
@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
        }
 
        if (perf_session__read_header(self, self->fd) < 0) {
-               pr_err("incompatible file format");
+               pr_err("incompatible file format (rerun with -v to learn more)");
                goto out_close;
        }
 
@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym)
        return 0;
 }
 
+static const u8 cpumodes[] = {
+       PERF_RECORD_MISC_USER,
+       PERF_RECORD_MISC_KERNEL,
+       PERF_RECORD_MISC_GUEST_USER,
+       PERF_RECORD_MISC_GUEST_KERNEL
+};
+#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
+
+static void ip__resolve_ams(struct machine *self, struct thread *thread,
+                           struct addr_map_symbol *ams,
+                           u64 ip)
+{
+       struct addr_location al;
+       size_t i;
+       u8 m;
+
+       memset(&al, 0, sizeof(al));
+
+       for (i = 0; i < NCPUMODES; i++) {
+               m = cpumodes[i];
+               /*
+                * We cannot use the header.misc hint to determine whether a
+                * branch stack address is user, kernel, guest, hypervisor.
+                * Branches may straddle the kernel/user/hypervisor boundaries.
+                * Thus, we have to try consecutively until we find a match
+                * or else, the symbol is unknown
+                */
+               thread__find_addr_location(thread, self, m, MAP__FUNCTION,
+                               ip, &al, NULL);
+               if (al.sym)
+                       goto found;
+       }
+found:
+       ams->addr = ip;
+       ams->al_addr = al.addr;
+       ams->sym = al.sym;
+       ams->map = al.map;
+}
+
+struct branch_info *machine__resolve_bstack(struct machine *self,
+                                           struct thread *thr,
+                                           struct branch_stack *bs)
+{
+       struct branch_info *bi;
+       unsigned int i;
+
+       bi = calloc(bs->nr, sizeof(struct branch_info));
+       if (!bi)
+               return NULL;
+
+       for (i = 0; i < bs->nr; i++) {
+               ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
+               ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
+               bi[i].flags = bs->entries[i].flags;
+       }
+       return bi;
+}
+
 int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
                               struct thread *thread,
                               struct ip_callchain *chain,
@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample)
                       i, sample->callchain->ips[i]);
 }
 
+static void branch_stack__printf(struct perf_sample *sample)
+{
+       uint64_t i;
+
+       printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
+
+       for (i = 0; i < sample->branch_stack->nr; i++)
+               printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
+                       i, sample->branch_stack->entries[i].from,
+                       sample->branch_stack->entries[i].to);
+}
+
 static void perf_session__print_tstamp(struct perf_session *session,
                                       union perf_event *event,
                                       struct perf_sample *sample)
@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
 
        if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
                callchain__printf(sample);
+
+       if (session->sample_type & PERF_SAMPLE_BRANCH_STACK)
+               branch_stack__printf(sample);
 }
 
 static struct machine *
@@ -796,6 +869,10 @@ static int perf_session_deliver_event(struct perf_session *session,
                        ++session->hists.stats.nr_unknown_id;
                        return -1;
                }
+               if (machine == NULL) {
+                       ++session->hists.stats.nr_unprocessable_samples;
+                       return -1;
+               }
                return tool->sample(tool, event, sample, evsel, machine);
        case PERF_RECORD_MMAP:
                return tool->mmap(tool, event, sample, machine);
@@ -964,6 +1041,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
                            session->hists.stats.nr_invalid_chains,
                            session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
        }
+
+       if (session->hists.stats.nr_unprocessable_samples != 0) {
+               ui__warning("%u unprocessable samples recorded.\n"
+                           "Do you have a KVM guest running and not using 'perf kvm'?\n",
+                           session->hists.stats.nr_unprocessable_samples);
+       }
 }
 
 #define session_done() (*(volatile int *)(&session_done))
@@ -1293,10 +1376,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
                          struct machine *machine, struct perf_evsel *evsel,
-                         int print_sym, int print_dso)
+                         int print_sym, int print_dso, int print_symoffset)
 {
        struct addr_location al;
-       const char *symname, *dsoname;
        struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
        struct callchain_cursor_node *node;
 
@@ -1324,20 +1406,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
 
                        printf("\t%16" PRIx64, node->ip);
                        if (print_sym) {
-                               if (node->sym && node->sym->name)
-                                       symname = node->sym->name;
-                               else
-                                       symname = "";
-
-                               printf(" %s", symname);
+                               printf(" ");
+                               symbol__fprintf_symname(node->sym, stdout);
                        }
                        if (print_dso) {
-                               if (node->map && node->map->dso && node->map->dso->name)
-                                       dsoname = node->map->dso->name;
-                               else
-                                       dsoname = "";
-
-                               printf(" (%s)", dsoname);
+                               printf(" (");
+                               map__fprintf_dsoname(al.map, stdout);
+                               printf(")");
                        }
                        printf("\n");
 
@@ -1347,21 +1422,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
        } else {
                printf("%16" PRIx64, sample->ip);
                if (print_sym) {
-                       if (al.sym && al.sym->name)
-                               symname = al.sym->name;
+                       printf(" ");
+                       if (print_symoffset)
+                               symbol__fprintf_symname_offs(al.sym, &al,
+                                                            stdout);
                        else
-                               symname = "";
-
-                       printf(" %s", symname);
+                               symbol__fprintf_symname(al.sym, stdout);
                }
 
                if (print_dso) {
-                       if (al.map && al.map->dso && al.map->dso->name)
-                               dsoname = al.map->dso->name;
-                       else
-                               dsoname = "";
-
-                       printf(" (%s)", dsoname);
+                       printf(" (");
+                       map__fprintf_dsoname(al.map, stdout);
+                       printf(")");
                }
        }
 }
index 37bc383..7a5434c 100644 (file)
@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel
                                    struct ip_callchain *chain,
                                    struct symbol **parent);
 
+struct branch_info *machine__resolve_bstack(struct machine *self,
+                                           struct thread *thread,
+                                           struct branch_stack *bs);
+
 bool perf_session__has_traces(struct perf_session *self, const char *msg);
 
 void mem_bswap_64(void *src, int byte_size);
@@ -147,7 +151,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
                          struct machine *machine, struct perf_evsel *evsel,
-                         int print_sym, int print_dso);
+                         int print_sym, int print_dso, int print_symoffset);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
                             const char *cpu_list, unsigned long *cpu_bitmap);
index 36d4c56..d0f9f29 100644 (file)
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split()
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 
+ext_sources = [f.strip() for f in file('util/python-ext-sources')
+                               if len(f.strip()) > 0 and f[0] != '#']
+
 perf = Extension('perf',
-                 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
-                            'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
-                            'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
-                            'util/debugfs.c'],
+                 sources = ext_sources,
                  include_dirs = ['util/include'],
                  extra_compile_args = cflags,
                  )
index 16da30d..a272374 100644 (file)
@@ -8,6 +8,7 @@ const char      default_sort_order[] = "comm,dso,symbol";
 const char     *sort_order = default_sort_order;
 int            sort__need_collapse = 0;
 int            sort__has_parent = 0;
+int            sort__branch_mode = -1; /* -1 = means not set */
 
 enum sort_type sort__first_dimension;
 
@@ -33,6 +34,9 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
                }
        }
        va_end(ap);
+
+       if (n >= (int)size)
+               return size - 1;
        return n;
 }
 
@@ -94,6 +98,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
        return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
 }
 
+static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
+{
+       struct dso *dso_l = map_l ? map_l->dso : NULL;
+       struct dso *dso_r = map_r ? map_r->dso : NULL;
+       const char *dso_name_l, *dso_name_r;
+
+       if (!dso_l || !dso_r)
+               return cmp_null(dso_l, dso_r);
+
+       if (verbose) {
+               dso_name_l = dso_l->long_name;
+               dso_name_r = dso_r->long_name;
+       } else {
+               dso_name_l = dso_l->short_name;
+               dso_name_r = dso_r->short_name;
+       }
+
+       return strcmp(dso_name_l, dso_name_r);
+}
+
 struct sort_entry sort_comm = {
        .se_header      = "Command",
        .se_cmp         = sort__comm_cmp,
@@ -107,36 +131,74 @@ struct sort_entry sort_comm = {
 static int64_t
 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
-       struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL;
-       const char *dso_name_l, *dso_name_r;
+       return _sort__dso_cmp(left->ms.map, right->ms.map);
+}
 
-       if (!dso_l || !dso_r)
-               return cmp_null(dso_l, dso_r);
 
-       if (verbose) {
-               dso_name_l = dso_l->long_name;
-               dso_name_r = dso_r->long_name;
-       } else {
-               dso_name_l = dso_l->short_name;
-               dso_name_r = dso_r->short_name;
+static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
+                             u64 ip_l, u64 ip_r)
+{
+       if (!sym_l || !sym_r)
+               return cmp_null(sym_l, sym_r);
+
+       if (sym_l == sym_r)
+               return 0;
+
+       if (sym_l)
+               ip_l = sym_l->start;
+       if (sym_r)
+               ip_r = sym_r->start;
+
+       return (int64_t)(ip_r - ip_l);
+}
+
+static int _hist_entry__dso_snprintf(struct map *map, char *bf,
+                                    size_t size, unsigned int width)
+{
+       if (map && map->dso) {
+               const char *dso_name = !verbose ? map->dso->short_name :
+                       map->dso->long_name;
+               return repsep_snprintf(bf, size, "%-*s", width, dso_name);
        }
 
-       return strcmp(dso_name_l, dso_name_r);
+       return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
 static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
                                    size_t size, unsigned int width)
 {
-       if (self->ms.map && self->ms.map->dso) {
-               const char *dso_name = !verbose ? self->ms.map->dso->short_name :
-                                                 self->ms.map->dso->long_name;
-               return repsep_snprintf(bf, size, "%-*s", width, dso_name);
+       return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
+}
+
+static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
+                                    u64 ip, char level, char *bf, size_t size,
+                                    unsigned int width __used)
+{
+       size_t ret = 0;
+
+       if (verbose) {
+               char o = map ? dso__symtab_origin(map->dso) : '!';
+               ret += repsep_snprintf(bf, size, "%-#*llx %c ",
+                                      BITS_PER_LONG / 4, ip, o);
        }
 
-       return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
+       ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+       if (sym)
+               ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+                                      width - ret,
+                                      sym->name);
+       else {
+               size_t len = BITS_PER_LONG / 4;
+               ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+                                      len, ip);
+               ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+                                      width - ret, "");
+       }
+
+       return ret;
 }
 
+
 struct sort_entry sort_dso = {
        .se_header      = "Shared Object",
        .se_cmp         = sort__dso_cmp,
@@ -144,8 +206,14 @@ struct sort_entry sort_dso = {
        .se_width_idx   = HISTC_DSO,
 };
 
-/* --sort symbol */
+static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width __used)
+{
+       return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
+                                        self->level, bf, size, width);
+}
 
+/* --sort symbol */
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -163,31 +231,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
        ip_l = left->ms.sym->start;
        ip_r = right->ms.sym->start;
 
-       return (int64_t)(ip_r - ip_l);
-}
-
-static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
-                                   size_t size, unsigned int width __used)
-{
-       size_t ret = 0;
-
-       if (verbose) {
-               char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
-               ret += repsep_snprintf(bf, size, "%-#*llx %c ",
-                                      BITS_PER_LONG / 4, self->ip, o);
-       }
-
-       if (!sort_dso.elide)
-               ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
-
-       if (self->ms.sym)
-               ret += repsep_snprintf(bf + ret, size - ret, "%s",
-                                      self->ms.sym->name);
-       else
-               ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
-                                      BITS_PER_LONG / 4, self->ip);
-
-       return ret;
+       return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
 }
 
 struct sort_entry sort_sym = {
@@ -246,19 +290,155 @@ struct sort_entry sort_cpu = {
        .se_width_idx   = HISTC_CPU,
 };
 
+static int64_t
+sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return _sort__dso_cmp(left->branch_info->from.map,
+                             right->branch_info->from.map);
+}
+
+static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width)
+{
+       return _hist_entry__dso_snprintf(self->branch_info->from.map,
+                                        bf, size, width);
+}
+
+struct sort_entry sort_dso_from = {
+       .se_header      = "Source Shared Object",
+       .se_cmp         = sort__dso_from_cmp,
+       .se_snprintf    = hist_entry__dso_from_snprintf,
+       .se_width_idx   = HISTC_DSO_FROM,
+};
+
+static int64_t
+sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return _sort__dso_cmp(left->branch_info->to.map,
+                             right->branch_info->to.map);
+}
+
+static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
+                                      size_t size, unsigned int width)
+{
+       return _hist_entry__dso_snprintf(self->branch_info->to.map,
+                                        bf, size, width);
+}
+
+static int64_t
+sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       struct addr_map_symbol *from_l = &left->branch_info->from;
+       struct addr_map_symbol *from_r = &right->branch_info->from;
+
+       if (!from_l->sym && !from_r->sym)
+               return right->level - left->level;
+
+       return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
+                            from_r->addr);
+}
+
+static int64_t
+sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       struct addr_map_symbol *to_l = &left->branch_info->to;
+       struct addr_map_symbol *to_r = &right->branch_info->to;
+
+       if (!to_l->sym && !to_r->sym)
+               return right->level - left->level;
+
+       return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
+}
+
+static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width __used)
+{
+       struct addr_map_symbol *from = &self->branch_info->from;
+       return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
+                                        self->level, bf, size, width);
+
+}
+
+static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width __used)
+{
+       struct addr_map_symbol *to = &self->branch_info->to;
+       return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
+                                        self->level, bf, size, width);
+
+}
+
+struct sort_entry sort_dso_to = {
+       .se_header      = "Target Shared Object",
+       .se_cmp         = sort__dso_to_cmp,
+       .se_snprintf    = hist_entry__dso_to_snprintf,
+       .se_width_idx   = HISTC_DSO_TO,
+};
+
+struct sort_entry sort_sym_from = {
+       .se_header      = "Source Symbol",
+       .se_cmp         = sort__sym_from_cmp,
+       .se_snprintf    = hist_entry__sym_from_snprintf,
+       .se_width_idx   = HISTC_SYMBOL_FROM,
+};
+
+struct sort_entry sort_sym_to = {
+       .se_header      = "Target Symbol",
+       .se_cmp         = sort__sym_to_cmp,
+       .se_snprintf    = hist_entry__sym_to_snprintf,
+       .se_width_idx   = HISTC_SYMBOL_TO,
+};
+
+static int64_t
+sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       const unsigned char mp = left->branch_info->flags.mispred !=
+                                       right->branch_info->flags.mispred;
+       const unsigned char p = left->branch_info->flags.predicted !=
+                                       right->branch_info->flags.predicted;
+
+       return mp || p;
+}
+
+static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
+                                   size_t size, unsigned int width){
+       static const char *out = "N/A";
+
+       if (self->branch_info->flags.predicted)
+               out = "N";
+       else if (self->branch_info->flags.mispred)
+               out = "Y";
+
+       return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_mispredict = {
+       .se_header      = "Branch Mispredicted",
+       .se_cmp         = sort__mispredict_cmp,
+       .se_snprintf    = hist_entry__mispredict_snprintf,
+       .se_width_idx   = HISTC_MISPREDICT,
+};
+
 struct sort_dimension {
        const char              *name;
        struct sort_entry       *entry;
        int                     taken;
 };
 
+#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
+
 static struct sort_dimension sort_dimensions[] = {
-       { .name = "pid",        .entry = &sort_thread,  },
-       { .name = "comm",       .entry = &sort_comm,    },
-       { .name = "dso",        .entry = &sort_dso,     },
-       { .name = "symbol",     .entry = &sort_sym,     },
-       { .name = "parent",     .entry = &sort_parent,  },
-       { .name = "cpu",        .entry = &sort_cpu,     },
+       DIM(SORT_PID, "pid", sort_thread),
+       DIM(SORT_COMM, "comm", sort_comm),
+       DIM(SORT_DSO, "dso", sort_dso),
+       DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+       DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
+       DIM(SORT_SYM, "symbol", sort_sym),
+       DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
+       DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
+       DIM(SORT_PARENT, "parent", sort_parent),
+       DIM(SORT_CPU, "cpu", sort_cpu),
+       DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
 };
 
 int sort_dimension__add(const char *tok)
@@ -270,7 +450,6 @@ int sort_dimension__add(const char *tok)
 
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
-
                if (sd->entry == &sort_parent) {
                        int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
                        if (ret) {
@@ -302,6 +481,16 @@ int sort_dimension__add(const char *tok)
                                sort__first_dimension = SORT_PARENT;
                        else if (!strcmp(sd->name, "cpu"))
                                sort__first_dimension = SORT_CPU;
+                       else if (!strcmp(sd->name, "symbol_from"))
+                               sort__first_dimension = SORT_SYM_FROM;
+                       else if (!strcmp(sd->name, "symbol_to"))
+                               sort__first_dimension = SORT_SYM_TO;
+                       else if (!strcmp(sd->name, "dso_from"))
+                               sort__first_dimension = SORT_DSO_FROM;
+                       else if (!strcmp(sd->name, "dso_to"))
+                               sort__first_dimension = SORT_DSO_TO;
+                       else if (!strcmp(sd->name, "mispredict"))
+                               sort__first_dimension = SORT_MISPREDICT;
                }
 
                list_add_tail(&sd->entry->list, &hist_entry__sort_list);
@@ -309,7 +498,6 @@ int sort_dimension__add(const char *tok)
 
                return 0;
        }
-
        return -ESRCH;
 }
 
index 3f67ae3..472aa5a 100644 (file)
@@ -31,11 +31,16 @@ extern const char *parent_pattern;
 extern const char default_sort_order[];
 extern int sort__need_collapse;
 extern int sort__has_parent;
+extern int sort__branch_mode;
 extern char *field_sep;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
 extern struct sort_entry sort_sym;
 extern struct sort_entry sort_parent;
+extern struct sort_entry sort_dso_from;
+extern struct sort_entry sort_dso_to;
+extern struct sort_entry sort_sym_from;
+extern struct sort_entry sort_sym_to;
 extern enum sort_type sort__first_dimension;
 
 /**
@@ -72,6 +77,7 @@ struct hist_entry {
                struct hist_entry *pair;
                struct rb_root    sorted_chain;
        };
+       struct branch_info      *branch_info;
        struct callchain_root   callchain[0];
 };
 
@@ -82,6 +88,11 @@ enum sort_type {
        SORT_SYM,
        SORT_PARENT,
        SORT_CPU,
+       SORT_DSO_FROM,
+       SORT_DSO_TO,
+       SORT_SYM_FROM,
+       SORT_SYM_TO,
+       SORT_MISPREDICT,
 };
 
 /*
index 92e0685..2eeb51b 100644 (file)
@@ -1,4 +1,5 @@
 #include "cache.h"
+#include <linux/kernel.h>
 
 int prefixcmp(const char *str, const char *prefix)
 {
@@ -89,14 +90,14 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
        if (!strbuf_avail(sb))
                strbuf_grow(sb, 64);
        va_start(ap, fmt);
-       len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+       len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
        va_end(ap);
        if (len < 0)
-               die("your vsnprintf is broken");
+               die("your vscnprintf is broken");
        if (len > strbuf_avail(sb)) {
                strbuf_grow(sb, len);
                va_start(ap, fmt);
-               len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+               len = vscnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
                va_end(ap);
                if (len > strbuf_avail(sb)) {
                        die("this should not happen, your snprintf is broken");
index 0975438..5dd83c3 100644 (file)
@@ -1,4 +1,3 @@
-#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <libgen.h>
@@ -12,6 +11,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include "build-id.h"
+#include "util.h"
 #include "debug.h"
 #include "symbol.h"
 #include "strlist.h"
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
                       sym->name);
 }
 
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+                                   const struct addr_location *al, FILE *fp)
+{
+       unsigned long offset;
+       size_t length;
+
+       if (sym && sym->name) {
+               length = fprintf(fp, "%s", sym->name);
+               if (al) {
+                       offset = al->addr - sym->start;
+                       length += fprintf(fp, "+0x%lx", offset);
+               }
+               return length;
+       } else
+               return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+       return symbol__fprintf_symname_offs(sym, NULL, fp);
+}
+
 void dso__set_long_name(struct dso *dso, char *name)
 {
        if (name == NULL)
index 123c2e1..ac49ef2 100644 (file)
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include "map.h"
+#include "../perf.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <stdio.h>
@@ -70,6 +71,7 @@ struct symbol_conf {
        unsigned short  priv_size;
        unsigned short  nr_events;
        bool            try_vmlinux_path,
+                       show_kernel_path,
                        use_modules,
                        sort_by_name,
                        show_nr_samples,
@@ -95,7 +97,11 @@ struct symbol_conf {
                        *col_width_list_str;
        struct strlist  *dso_list,
                        *comm_list,
-                       *sym_list;
+                       *sym_list,
+                       *dso_from_list,
+                       *dso_to_list,
+                       *sym_from_list,
+                       *sym_to_list;
        const char      *symfs;
 };
 
@@ -119,6 +125,19 @@ struct map_symbol {
        bool          has_children;
 };
 
+struct addr_map_symbol {
+       struct map    *map;
+       struct symbol *sym;
+       u64           addr;
+       u64           al_addr;
+};
+
+struct branch_info {
+       struct addr_map_symbol from;
+       struct addr_map_symbol to;
+       struct branch_flags flags;
+};
+
 struct addr_location {
        struct thread *thread;
        struct map    *map;
@@ -241,6 +260,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines);
 
 int symbol__init(void);
 void symbol__exit(void);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+                                   const struct addr_location *al, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
 size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
new file mode 100644 (file)
index 0000000..48c6902
--- /dev/null
@@ -0,0 +1,60 @@
+
+#include "util.h"
+#include "sysfs.h"
+
+static const char * const sysfs_known_mountpoints[] = {
+       "/sys",
+       0,
+};
+
+static int sysfs_found;
+char sysfs_mountpoint[PATH_MAX];
+
+static int sysfs_valid_mountpoint(const char *sysfs)
+{
+       struct statfs st_fs;
+
+       if (statfs(sysfs, &st_fs) < 0)
+               return -ENOENT;
+       else if (st_fs.f_type != (long) SYSFS_MAGIC)
+               return -ENOENT;
+
+       return 0;
+}
+
+const char *sysfs_find_mountpoint(void)
+{
+       const char * const *ptr;
+       char type[100];
+       FILE *fp;
+
+       if (sysfs_found)
+               return (const char *) sysfs_mountpoint;
+
+       ptr = sysfs_known_mountpoints;
+       while (*ptr) {
+               if (sysfs_valid_mountpoint(*ptr) == 0) {
+                       sysfs_found = 1;
+                       strcpy(sysfs_mountpoint, *ptr);
+                       return sysfs_mountpoint;
+               }
+               ptr++;
+       }
+
+       /* give up and parse /proc/mounts */
+       fp = fopen("/proc/mounts", "r");
+       if (fp == NULL)
+               return NULL;
+
+       while (!sysfs_found &&
+              fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+                     sysfs_mountpoint, type) == 2) {
+
+               if (strcmp(type, "sysfs") == 0)
+                       sysfs_found = 1;
+       }
+
+       fclose(fp);
+
+       return sysfs_found ? sysfs_mountpoint : NULL;
+}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
new file mode 100644 (file)
index 0000000..a813b72
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __SYSFS_H__
+#define __SYSFS_H__
+
+const char *sysfs_find_mountpoint(void);
+
+#endif /* __DEBUGFS_H__ */
index a5df131..84d9bd7 100644 (file)
@@ -1,6 +1,13 @@
 #include <dirent.h>
+#include <limits.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "strlist.h"
+#include <string.h>
 #include "thread_map.h"
 
 /* Skip "." and ".." directories */
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
        sprintf(name, "/proc/%d/task", pid);
        items = scandir(name, &namelist, filter, NULL);
        if (items <= 0)
-                return NULL;
+               return NULL;
 
        threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
        if (threads != NULL) {
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
        return threads;
 }
 
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+       DIR *proc;
+       int max_threads = 32, items, i;
+       char path[256];
+       struct dirent dirent, *next, **namelist = NULL;
+       struct thread_map *threads = malloc(sizeof(*threads) +
+                                           max_threads * sizeof(pid_t));
+       if (threads == NULL)
+               goto out;
+
+       proc = opendir("/proc");
+       if (proc == NULL)
+               goto out_free_threads;
+
+       threads->nr = 0;
+
+       while (!readdir_r(proc, &dirent, &next) && next) {
+               char *end;
+               bool grow = false;
+               struct stat st;
+               pid_t pid = strtol(dirent.d_name, &end, 10);
+
+               if (*end) /* only interested in proper numerical dirents */
+                       continue;
+
+               snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
+
+               if (stat(path, &st) != 0)
+                       continue;
+
+               if (st.st_uid != uid)
+                       continue;
+
+               snprintf(path, sizeof(path), "/proc/%d/task", pid);
+               items = scandir(path, &namelist, filter, NULL);
+               if (items <= 0)
+                       goto out_free_closedir;
+
+               while (threads->nr + items >= max_threads) {
+                       max_threads *= 2;
+                       grow = true;
+               }
+
+               if (grow) {
+                       struct thread_map *tmp;
+
+                       tmp = realloc(threads, (sizeof(*threads) +
+                                               max_threads * sizeof(pid_t)));
+                       if (tmp == NULL)
+                               goto out_free_namelist;
+
+                       threads = tmp;
+               }
+
+               for (i = 0; i < items; i++)
+                       threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
+
+               for (i = 0; i < items; i++)
+                       free(namelist[i]);
+               free(namelist);
+
+               threads->nr += items;
+       }
+
+out_closedir:
+       closedir(proc);
+out:
+       return threads;
+
+out_free_threads:
+       free(threads);
+       return NULL;
+
+out_free_namelist:
+       for (i = 0; i < items; i++)
+               free(namelist[i]);
+       free(namelist);
+
+out_free_closedir:
+       free(threads);
+       threads = NULL;
+       goto out_closedir;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
 {
        if (pid != -1)
                return thread_map__new_by_pid(pid);
+
+       if (tid == -1 && uid != UINT_MAX)
+               return thread_map__new_by_uid(uid);
+
        return thread_map__new_by_tid(tid);
 }
 
+static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+       struct thread_map *threads = NULL, *nt;
+       char name[256];
+       int items, total_tasks = 0;
+       struct dirent **namelist = NULL;
+       int i, j = 0;
+       pid_t pid, prev_pid = INT_MAX;
+       char *end_ptr;
+       struct str_node *pos;
+       struct strlist *slist = strlist__new(false, pid_str);
+
+       if (!slist)
+               return NULL;
+
+       strlist__for_each(pos, slist) {
+               pid = strtol(pos->s, &end_ptr, 10);
+
+               if (pid == INT_MIN || pid == INT_MAX ||
+                   (*end_ptr != '\0' && *end_ptr != ','))
+                       goto out_free_threads;
+
+               if (pid == prev_pid)
+                       continue;
+
+               sprintf(name, "/proc/%d/task", pid);
+               items = scandir(name, &namelist, filter, NULL);
+               if (items <= 0)
+                       goto out_free_threads;
+
+               total_tasks += items;
+               nt = realloc(threads, (sizeof(*threads) +
+                                      sizeof(pid_t) * total_tasks));
+               if (nt == NULL)
+                       goto out_free_threads;
+
+               threads = nt;
+
+               if (threads) {
+                       for (i = 0; i < items; i++)
+                               threads->map[j++] = atoi(namelist[i]->d_name);
+                       threads->nr = total_tasks;
+               }
+
+               for (i = 0; i < items; i++)
+                       free(namelist[i]);
+               free(namelist);
+
+               if (!threads)
+                       break;
+       }
+
+out:
+       strlist__delete(slist);
+       return threads;
+
+out_free_threads:
+       free(threads);
+       threads = NULL;
+       goto out;
+}
+
+static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+       struct thread_map *threads = NULL, *nt;
+       int ntasks = 0;
+       pid_t tid, prev_tid = INT_MAX;
+       char *end_ptr;
+       struct str_node *pos;
+       struct strlist *slist;
+
+       /* perf-stat expects threads to be generated even if tid not given */
+       if (!tid_str) {
+               threads = malloc(sizeof(*threads) + sizeof(pid_t));
+               if (threads != NULL) {
+                       threads->map[0] = -1;
+                       threads->nr     = 1;
+               }
+               return threads;
+       }
+
+       slist = strlist__new(false, tid_str);
+       if (!slist)
+               return NULL;
+
+       strlist__for_each(pos, slist) {
+               tid = strtol(pos->s, &end_ptr, 10);
+
+               if (tid == INT_MIN || tid == INT_MAX ||
+                   (*end_ptr != '\0' && *end_ptr != ','))
+                       goto out_free_threads;
+
+               if (tid == prev_tid)
+                       continue;
+
+               ntasks++;
+               nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+
+               if (nt == NULL)
+                       goto out_free_threads;
+
+               threads = nt;
+               threads->map[ntasks - 1] = tid;
+               threads->nr              = ntasks;
+       }
+out:
+       return threads;
+
+out_free_threads:
+       free(threads);
+       threads = NULL;
+       goto out;
+}
+
+struct thread_map *thread_map__new_str(const char *pid, const char *tid,
+                                      uid_t uid)
+{
+       if (pid)
+               return thread_map__new_by_pid_str(pid);
+
+       if (!tid && uid != UINT_MAX)
+               return thread_map__new_by_uid(uid);
+
+       return thread_map__new_by_tid_str(tid);
+}
+
 void thread_map__delete(struct thread_map *threads)
 {
        free(threads);
 }
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+       int i;
+       size_t printed = fprintf(fp, "%d thread%s: ",
+                                threads->nr, threads->nr > 1 ? "s" : "");
+       for (i = 0; i < threads->nr; ++i)
+               printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+
+       return printed + fprintf(fp, "\n");
+}
index 3cb9073..7da80f1 100644 (file)
@@ -2,6 +2,7 @@
 #define __PERF_THREAD_MAP_H
 
 #include <sys/types.h>
+#include <stdio.h>
 
 struct thread_map {
        int nr;
@@ -10,6 +11,14 @@ struct thread_map {
 
 struct thread_map *thread_map__new_by_pid(pid_t pid);
 struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+
+struct thread_map *thread_map__new_str(const char *pid,
+               const char *tid, uid_t uid);
+
 void thread_map__delete(struct thread_map *threads);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
 #endif /* __PERF_THREAD_MAP_H */
index 500471d..09fe579 100644 (file)
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 
        ret += SNPRINTF(bf + ret, size - ret, "], ");
 
-       if (top->target_pid != -1)
-               ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
+       if (top->target_pid)
+               ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
                                top->target_pid);
-       else if (top->target_tid != -1)
-               ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
+       else if (top->target_tid)
+               ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
                                top->target_tid);
+       else if (top->uid_str != NULL)
+               ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+                               top->uid_str);
        else
                ret += SNPRINTF(bf + ret, size - ret, " (all");
 
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
                ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
                                top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
        else {
-               if (top->target_tid != -1)
+               if (top->target_tid)
                        ret += SNPRINTF(bf + ret, size - ret, ")");
                else
                        ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
index a248f3c..ce61cb2 100644 (file)
@@ -23,7 +23,8 @@ struct perf_top {
        u64                guest_us_samples, guest_kernel_samples;
        int                print_entries, count_filter, delay_secs;
        int                freq;
-       pid_t              target_pid, target_tid;
+       const char         *target_pid, *target_tid;
+       uid_t              uid;
        bool               hide_kernel_symbols, hide_user_symbols, zero;
        bool               system_wide;
        bool               use_tui, use_stdio;
@@ -33,7 +34,8 @@ struct perf_top {
        bool               vmlinux_warned;
        bool               inherit;
        bool               group;
-       bool               sample_id_all_avail;
+       bool               sample_id_all_missing;
+       bool               exclude_guest_missing;
        bool               dump_symtab;
        const char         *cpu_list;
        struct hist_entry  *sym_filter_entry;
@@ -45,6 +47,7 @@ struct perf_top {
        int                realtime_prio;
        int                sym_pcnt_filter;
        const char         *sym_filter;
+       const char         *uid_str;
 };
 
 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
index 1a8d4dc..a4088ce 100644 (file)
@@ -25,7 +25,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 #include <errno.h>
 
 #include "../perf.h"
@@ -1424,6 +1423,11 @@ static long long arg_num_eval(struct print_arg *arg)
                                die("unknown op '%s'", arg->op.op);
                        }
                        break;
+               case '+':
+                       left = arg_num_eval(arg->op.left);
+                       right = arg_num_eval(arg->op.right);
+                       val = left + right;
+                       break;
                default:
                        die("unknown op '%s'", arg->op.op);
                }
@@ -1484,6 +1488,13 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
 
                free_token(token);
                type = process_arg(event, arg, &token);
+
+               if (type == EVENT_OP)
+                       type = process_op(event, arg, &token);
+
+               if (type == EVENT_ERROR)
+                       goto out_free;
+
                if (test_type_token(type, token, EVENT_DELIM, ","))
                        goto out_free;
 
index f55cc3a..b9592e0 100644 (file)
@@ -33,7 +33,6 @@
 #include <pthread.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include <ctype.h>
 #include <errno.h>
 
 #include "../perf.h"
index a3fdf55..18ae6c1 100644 (file)
@@ -22,7 +22,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 #include <errno.h>
 
 #include "../perf.h"
index 295a9c9..57a4c6e 100644 (file)
@@ -69,14 +69,17 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
        if (!self->navkeypressed)
                width += 1;
 
+       if (!ab->hide_src_code && ol->offset != -1)
+               if (!current_entry || (self->use_navkeypressed &&
+                                      !self->navkeypressed))
+                       ui_browser__set_color(self, HE_COLORSET_CODE);
+
        if (!*ol->line)
                slsmg_write_nstring(" ", width - 18);
        else
                slsmg_write_nstring(ol->line, width - 18);
 
-       if (!current_entry)
-               ui_browser__set_color(self, HE_COLORSET_CODE);
-       else
+       if (current_entry)
                ab->selection = ol;
 }
 
@@ -230,9 +233,9 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
        struct rb_node *nd = NULL;
        struct map_symbol *ms = self->b.priv;
        struct symbol *sym = ms->sym;
-       const char *help = "<-, ESC: exit, TAB/shift+TAB: cycle hottest lines, "
-                          "H: Hottest, -> Line action, S -> Toggle source "
-                          "code view";
+       const char *help = "<-/ESC: Exit, TAB/shift+TAB: Cycle hot lines, "
+                          "H: Go to hottest line, ->/ENTER: Line action, "
+                          "S: Toggle source code view";
        int key;
 
        if (ui_browser__show(&self->b, sym->name, help) < 0)
@@ -284,9 +287,11 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
                                nd = self->curr_hot;
                        break;
                case 'H':
+               case 'h':
                        nd = self->curr_hot;
                        break;
                case 'S':
+               case 's':
                        if (annotate_browser__toggle_source(self))
                                ui_helpline__puts(help);
                        continue;
@@ -338,6 +343,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
                                pthread_mutex_unlock(&notes->lock);
                                symbol__tui_annotate(target, ms->map, evidx,
                                                     timer, arg, delay_secs);
+                               ui_browser__show_title(&self->b, sym->name);
                        }
                        continue;
                case K_LEFT:
index e81aef1..fa530fc 100644 (file)
@@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
                self->hists = hists;
                self->b.refresh = hist_browser__refresh;
                self->b.seek = ui_browser__hists_seek;
-               self->b.use_navkeypressed = true,
-               self->has_symbols = sort_sym.list.next != NULL;
+               self->b.use_navkeypressed = true;
+               if (sort__branch_mode == 1)
+                       self->has_symbols = sort_sym_from.list.next != NULL;
+               else
+                       self->has_symbols = sort_sym.list.next != NULL;
        }
 
        return self;
@@ -837,19 +840,32 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
        unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
 
        nr_events = convert_unit(nr_events, &unit);
-       printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
+       printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
 
-       if (thread)
+       if (self->uid_filter_str)
                printed += snprintf(bf + printed, size - printed,
+                                   ", UID: %s", self->uid_filter_str);
+       if (thread)
+               printed += scnprintf(bf + printed, size - printed,
                                    ", Thread: %s(%d)",
                                    (thread->comm_set ? thread->comm : ""),
                                    thread->pid);
        if (dso)
-               printed += snprintf(bf + printed, size - printed,
+               printed += scnprintf(bf + printed, size - printed,
                                    ", DSO: %s", dso->short_name);
        return printed;
 }
 
+static inline void free_popup_options(char **options, int n)
+{
+       int i;
+
+       for (i = 0; i < n; ++i) {
+               free(options[i]);
+               options[i] = NULL;
+       }
+}
+
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                    const char *helpline, const char *ev_name,
                                    bool left_exits,
@@ -858,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 {
        struct hists *self = &evsel->hists;
        struct hist_browser *browser = hist_browser__new(self);
+       struct branch_info *bi;
        struct pstack *fstack;
+       char *options[16];
+       int nr_options = 0;
        int key = -1;
 
        if (browser == NULL)
@@ -870,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
        ui_helpline__push(helpline);
 
+       memset(options, 0, sizeof(options));
+
        while (1) {
                const struct thread *thread = NULL;
                const struct dso *dso = NULL;
-               char *options[16];
-               int nr_options = 0, choice = 0, i,
+               int choice = 0,
                    annotate = -2, zoom_dso = -2, zoom_thread = -2,
-                   browse_map = -2;
+                   annotate_f = -2, annotate_t = -2, browse_map = -2;
+
+               nr_options = 0;
 
                key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
 
@@ -884,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        thread = hist_browser__selected_thread(browser);
                        dso = browser->selection->map ? browser->selection->map->dso : NULL;
                }
-
                switch (key) {
                case K_TAB:
                case K_UNTAB:
@@ -899,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        if (!browser->has_symbols) {
                                ui_browser__warning(&browser->b, delay_secs * 2,
                        "Annotation is only available for symbolic views, "
-                       "include \"sym\" in --sort to use it.");
+                       "include \"sym*\" in --sort to use it.");
                                continue;
                        }
 
@@ -969,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                if (!browser->has_symbols)
                        goto add_exit_option;
 
-               if (browser->selection != NULL &&
-                   browser->selection->sym != NULL &&
-                   !browser->selection->map->dso->annotate_warned &&
-                   asprintf(&options[nr_options], "Annotate %s",
-                            browser->selection->sym->name) > 0)
-                       annotate = nr_options++;
+               if (sort__branch_mode == 1) {
+                       bi = browser->he_selection->branch_info;
+                       if (browser->selection != NULL &&
+                           bi &&
+                           bi->from.sym != NULL &&
+                           !bi->from.map->dso->annotate_warned &&
+                               asprintf(&options[nr_options], "Annotate %s",
+                                        bi->from.sym->name) > 0)
+                               annotate_f = nr_options++;
+
+                       if (browser->selection != NULL &&
+                           bi &&
+                           bi->to.sym != NULL &&
+                           !bi->to.map->dso->annotate_warned &&
+                           (bi->to.sym != bi->from.sym ||
+                            bi->to.map->dso != bi->from.map->dso) &&
+                               asprintf(&options[nr_options], "Annotate %s",
+                                        bi->to.sym->name) > 0)
+                               annotate_t = nr_options++;
+               } else {
+
+                       if (browser->selection != NULL &&
+                           browser->selection->sym != NULL &&
+                           !browser->selection->map->dso->annotate_warned &&
+                               asprintf(&options[nr_options], "Annotate %s",
+                                        browser->selection->sym->name) > 0)
+                               annotate = nr_options++;
+               }
 
                if (thread != NULL &&
                    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
@@ -995,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        browse_map = nr_options++;
 add_exit_option:
                options[nr_options++] = (char *)"Exit";
-
+retry_popup_menu:
                choice = ui__popup_menu(nr_options, options);
 
-               for (i = 0; i < nr_options - 1; ++i)
-                       free(options[i]);
-
                if (choice == nr_options - 1)
                        break;
 
-               if (choice == -1)
+               if (choice == -1) {
+                       free_popup_options(options, nr_options - 1);
                        continue;
+               }
 
-               if (choice == annotate) {
+               if (choice == annotate || choice == annotate_t || choice == annotate_f) {
                        struct hist_entry *he;
                        int err;
 do_annotate:
                        he = hist_browser__selected_entry(browser);
                        if (he == NULL)
                                continue;
+
+                       /*
+                        * we stash the branch_info symbol + map into the
+                        * the ms so we don't have to rewrite all the annotation
+                        * code to use branch_info.
+                        * in branch mode, the ms struct is not used
+                        */
+                       if (choice == annotate_f) {
+                               he->ms.sym = he->branch_info->from.sym;
+                               he->ms.map = he->branch_info->from.map;
+                       }  else if (choice == annotate_t) {
+                               he->ms.sym = he->branch_info->to.sym;
+                               he->ms.map = he->branch_info->to.map;
+                       }
+
                        /*
                         * Don't let this be freed, say, by hists__decay_entry.
                         */
@@ -1021,9 +1078,18 @@ do_annotate:
                        err = hist_entry__tui_annotate(he, evsel->idx,
                                                       timer, arg, delay_secs);
                        he->used = false;
+                       /*
+                        * offer option to annotate the other branch source or target
+                        * (if they exists) when returning from annotate
+                        */
+                       if ((err == 'q' || err == CTRL('c'))
+                           && annotate_t != -2 && annotate_f != -2)
+                               goto retry_popup_menu;
+
                        ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
                        if (err)
                                ui_browser__handle_resize(&browser->b);
+
                } else if (choice == browse_map)
                        map__browse(browser->selection->map);
                else if (choice == zoom_dso) {
@@ -1069,6 +1135,7 @@ out_free_stack:
        pstack__delete(fstack);
 out:
        hist_browser__delete(browser);
+       free_popup_options(options, nr_options - 1);
        return key;
 }
 
@@ -1095,7 +1162,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
                                                       HE_COLORSET_NORMAL);
 
        nr_events = convert_unit(nr_events, &unit);
-       printed = snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
+       printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
                           unit, unit == ' ' ? "" : " ", ev_name);
        slsmg_printf("%s", bf);
 
@@ -1105,8 +1172,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
                if (!current_entry)
                        ui_browser__set_color(browser, HE_COLORSET_TOP);
                nr_events = convert_unit(nr_events, &unit);
-               snprintf(bf, sizeof(bf), ": %ld%c%schunks LOST!", nr_events,
-                        unit, unit == ' ' ? "" : " ");
+               printed += scnprintf(bf, sizeof(bf), ": %ld%c%schunks LOST!",
+                                    nr_events, unit, unit == ' ' ? "" : " ");
                warn = bf;
        }
 
index 6905bcc..eca6575 100644 (file)
@@ -3,9 +3,9 @@
 #include <newt.h>
 #include <inttypes.h>
 #include <sys/ttydefaults.h>
-#include <ctype.h>
 #include <string.h>
 #include <linux/bitops.h>
+#include "../../util.h"
 #include "../../debug.h"
 #include "../../symbol.h"
 #include "../browser.h"
index 4f48f59..2f950c2 100644 (file)
@@ -64,7 +64,7 @@ int ui_helpline__show_help(const char *format, va_list ap)
        static int backlog;
 
        pthread_mutex_lock(&ui__lock);
-       ret = vsnprintf(ui_helpline__last_msg + backlog,
+       ret = vscnprintf(ui_helpline__last_msg + backlog,
                        sizeof(ui_helpline__last_msg) - backlog, format, ap);
        backlog += ret;
 
index d76d1c0..52bb07c 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "util.h"
+#include "debug.h"
 
 static void report(const char *prefix, const char *err, va_list params)
 {
@@ -81,3 +82,41 @@ void warning(const char *warn, ...)
        warn_routine(warn, params);
        va_end(params);
 }
+
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
+{
+       struct passwd pwd, *result;
+       char buf[1024];
+
+       if (str == NULL)
+               return UINT_MAX;
+
+       /* UID and PID are mutually exclusive */
+       if (tid || pid) {
+               ui__warning("PID/TID switch overriding UID\n");
+               sleep(1);
+               return UINT_MAX;
+       }
+
+       getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+       if (result == NULL) {
+               char *endptr;
+               int uid = strtol(str, &endptr, 10);
+
+               if (*endptr != '\0') {
+                       ui__error("Invalid user %s\n", str);
+                       return UINT_MAX - 1;
+               }
+
+               getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+               if (result == NULL) {
+                       ui__error("Problems obtaining information for user %s\n",
+                                 str);
+                       return UINT_MAX - 1;
+               }
+       }
+
+       return result->pw_uid;
+}
index 8131410..8109a90 100644 (file)
@@ -6,7 +6,7 @@
  * XXX We need to find a better place for these things...
  */
 bool perf_host  = true;
-bool perf_guest = true;
+bool perf_guest = false;
 
 void event_attr_init(struct perf_event_attr *attr)
 {
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr)
                attr->exclude_host  = 1;
        if (!perf_guest)
                attr->exclude_guest = 1;
+       /* to capture ABI version */
+       attr->size = sizeof(*attr);
 }
 
 int mkdir_p(char *path, mode_t mode)
index ecf9898..0f99f39 100644 (file)
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext)
 #undef isalpha
 #undef isprint
 #undef isalnum
+#undef islower
+#undef isupper
 #undef tolower
 #undef toupper
 
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256];
 #define isalpha(x) sane_istest(x,GIT_ALPHA)
 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
 #define isprint(x) sane_istest(x,GIT_PRINT)
+#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20))
+#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 
@@ -245,6 +249,8 @@ struct perf_event_attr;
 
 void event_attr_init(struct perf_event_attr *attr);
 
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
+
 #define _STR(x) #x
 #define STR(x) _STR(x)