Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging-2.6
Conflicts: Documentation/feature-removal-schedule.txt drivers/staging/bcm/headers.h drivers/staging/brcm80211/brcmfmac/dhd_linux.c drivers/staging/brcm80211/brcmfmac/dhd_sdio.c drivers/staging/brcm80211/brcmfmac/wl_cfg80211.h drivers/staging/brcm80211/brcmfmac/wl_iw.c drivers/staging/et131x/et131x_netdev.c drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c drivers/staging/rtl8192e/r8192E.h drivers/staging/usbip/userspace/src/utils.h
This commit is contained in:
commit
48d67fe035
|
@ -45,6 +45,11 @@ modules.builtin
|
|||
/Module.markers
|
||||
/Module.symvers
|
||||
|
||||
#
|
||||
# Debian directory (make deb-pkg)
|
||||
#
|
||||
/debian/
|
||||
|
||||
#
|
||||
# git files that we don't want to ignore even it they are dot-files
|
||||
#
|
||||
|
|
3
.mailmap
3
.mailmap
|
@ -73,8 +73,7 @@ Linas Vepstas <linas@austin.ibm.com>
|
|||
Mark Brown <broonie@sirena.org.uk>
|
||||
Matthieu CASTET <castet.matthieu@free.fr>
|
||||
Mayuresh Janorkar <mayur@ti.com>
|
||||
Michael Buesch <mb@bu3sch.de>
|
||||
Michael Buesch <mbuesch@freenet.de>
|
||||
Michael Buesch <m@bues.ch>
|
||||
Michel Dänzer <michel@tungstengraphics.com>
|
||||
Mitesh shah <mshah@teja.com>
|
||||
Morten Welinder <terra@gnome.org>
|
||||
|
|
|
@ -14,7 +14,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
|
|||
|
||||
A much more powerful interface, /proc/<pid>/oom_score_adj, was
|
||||
introduced with the oom killer rewrite that allows users to increase or
|
||||
decrease the badness() score linearly. This interface will replace
|
||||
decrease the badness score linearly. This interface will replace
|
||||
/proc/<pid>/oom_adj.
|
||||
|
||||
A warning will be emitted to the kernel log if an application uses this
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
What: /dev/fw[0-9]+
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
The character device files /dev/fw* are the interface between
|
||||
firewire-core and IEEE 1394 device drivers implemented in
|
||||
userspace. The ioctl(2)- and read(2)-based ABI is defined and
|
||||
documented in <linux/firewire-cdev.h>.
|
||||
|
||||
This ABI offers most of the features which firewire-core also
|
||||
exposes to kernelspace IEEE 1394 drivers.
|
||||
|
||||
Each /dev/fw* is associated with one IEEE 1394 node, which can
|
||||
be remote or local nodes. Operations on a /dev/fw* file have
|
||||
different scope:
|
||||
- The 1394 node which is associated with the file:
|
||||
- Asynchronous request transmission
|
||||
- Get the Configuration ROM
|
||||
- Query node ID
|
||||
- Query maximum speed of the path between this node
|
||||
and local node
|
||||
- The 1394 bus (i.e. "card") to which the node is attached to:
|
||||
- Isochronous stream transmission and reception
|
||||
- Asynchronous stream transmission and reception
|
||||
- Asynchronous broadcast request transmission
|
||||
- PHY packet transmission and reception
|
||||
- Allocate, reallocate, deallocate isochronous
|
||||
resources (channels, bandwidth) at the bus's IRM
|
||||
- Query node IDs of local node, root node, IRM, bus
|
||||
manager
|
||||
- Query cycle time
|
||||
- Bus reset initiation, bus reset event reception
|
||||
- All 1394 buses:
|
||||
- Allocation of IEEE 1212 address ranges on the local
|
||||
link layers, reception of inbound requests to such
|
||||
an address range, asynchronous response transmission
|
||||
to inbound requests
|
||||
- Addition of descriptors or directories to the local
|
||||
nodes' Configuration ROM
|
||||
|
||||
Due to the different scope of operations and in order to let
|
||||
userland implement different access permission models, some
|
||||
operations are restricted to /dev/fw* files that are associated
|
||||
with a local node:
|
||||
- Addition of descriptors or directories to the local
|
||||
nodes' Configuration ROM
|
||||
- PHY packet transmission and reception
|
||||
|
||||
A /dev/fw* file remains associated with one particular node
|
||||
during its entire life time. Bus topology changes, and hence
|
||||
node ID changes, are tracked by firewire-core. ABI users do not
|
||||
need to be aware of topology.
|
||||
|
||||
The following file operations are supported:
|
||||
|
||||
open(2)
|
||||
Currently the only useful flags are O_RDWR.
|
||||
|
||||
ioctl(2)
|
||||
Initiate various actions. Some take immediate effect, others
|
||||
are performed asynchronously while or after the ioctl returns.
|
||||
See the inline documentation in <linux/firewire-cdev.h> for
|
||||
descriptions of all ioctls.
|
||||
|
||||
poll(2), select(2), epoll_wait(2) etc.
|
||||
Watch for events to become available to be read.
|
||||
|
||||
read(2)
|
||||
Receive various events. There are solicited events like
|
||||
outbound asynchronous transaction completion or isochronous
|
||||
buffer completion, and unsolicited events such as bus resets,
|
||||
request reception, or PHY packet reception. Always use a read
|
||||
buffer which is large enough to receive the largest event that
|
||||
could ever arrive. See <linux/firewire-cdev.h> for descriptions
|
||||
of all event types and for which ioctls affect reception of
|
||||
events.
|
||||
|
||||
mmap(2)
|
||||
Allocate a DMA buffer for isochronous reception or transmission
|
||||
and map it into the process address space. The arguments should
|
||||
be used as follows: addr = NULL, length = the desired buffer
|
||||
size, i.e. number of packets times size of largest packet,
|
||||
prot = at least PROT_READ for reception and at least PROT_WRITE
|
||||
for transmission, flags = MAP_SHARED, fd = the handle to the
|
||||
/dev/fw*, offset = 0.
|
||||
|
||||
Isochronous reception works in packet-per-buffer fashion except
|
||||
for multichannel reception which works in buffer-fill mode.
|
||||
|
||||
munmap(2)
|
||||
Unmap the isochronous I/O buffer from the process address space.
|
||||
|
||||
close(2)
|
||||
Besides stopping and freeing I/O contexts that were associated
|
||||
with the file descriptor, back out any changes to the local
|
||||
nodes' Configuration ROM. Deallocate isochronous channels and
|
||||
bandwidth at the IRM that were marked for kernel-assisted
|
||||
re- and deallocation.
|
||||
|
||||
Users: libraw1394
|
||||
libdc1394
|
||||
tools like jujuutils, fwhack, ...
|
|
@ -0,0 +1,122 @@
|
|||
What: /sys/bus/firewire/devices/fw[0-9]+/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 node device attributes.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
config_rom
|
||||
Contents of the Configuration ROM register.
|
||||
Binary attribute; an array of host-endian u32.
|
||||
|
||||
guid
|
||||
The node's EUI-64 in the bus information block of
|
||||
Configuration ROM.
|
||||
Hexadecimal string representation of an u64.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/fw[0-9]+/units
|
||||
Date: June 2009
|
||||
KernelVersion: 2.6.31
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 node device attribute.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
units
|
||||
Summary of all units present in an IEEE 1394 node.
|
||||
Contains space-separated tuples of specifier_id and
|
||||
version of each unit present in the node. Specifier_id
|
||||
and version are hexadecimal string representations of
|
||||
u24 of the respective unit directory entries.
|
||||
Specifier_id and version within each tuple are separated
|
||||
by a colon.
|
||||
|
||||
Users: udev rules to set ownership and access permissions or ACLs of
|
||||
/dev/fw[0-9]+ character device files
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 unit device attributes.
|
||||
Read-only. Immutable during the unit device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
modalias
|
||||
Same as MODALIAS in the uevent at device creation.
|
||||
|
||||
rom_index
|
||||
Offset of the unit directory within the parent device's
|
||||
(node device's) Configuration ROM, in quadlets.
|
||||
Decimal string representation.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/*/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
Attributes common to IEEE 1394 node devices and unit devices.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
Immutable during the unit device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
These attributes are only created if the root directory of an
|
||||
IEEE 1394 node or the unit directory of an IEEE 1394 unit
|
||||
actually contains according entries.
|
||||
|
||||
hardware_version
|
||||
Hexadecimal string representation of an u24.
|
||||
|
||||
hardware_version_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
model
|
||||
Hexadecimal string representation of an u24.
|
||||
|
||||
model_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
specifier_id
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in unit directories according to IEEE 1212.
|
||||
|
||||
vendor
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in the root directory according to IEEE 1212.
|
||||
|
||||
vendor_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
version
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in unit directories according to IEEE 1212.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
|
||||
formerly
|
||||
/sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
|
||||
Date: Feb 2004
|
||||
KernelVersion: 2.6.4
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
SCSI target port identifier and logical unit identifier of a
|
||||
logical unit of an SBP-2 target. The identifiers are specified
|
||||
in SAM-2...SAM-4 annex A. They are persistent and world-wide
|
||||
unique properties the SBP-2 attached target.
|
||||
|
||||
Read-only attribute, immutable during the target's lifetime.
|
||||
Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
|
||||
Colon-separated hexadecimal string representations of
|
||||
u64 EUI-64 : u24 directory_ID : u16 LUN
|
||||
without 0x prefixes, without whitespace. The former sbp2 driver
|
||||
(removed in 2.6.37 after being superseded by firewire-sbp2) used
|
||||
a somewhat shorter format which was not as close to SAM.
|
||||
|
||||
Users: udev rules to create /dev/disk/by-id/ symlinks
|
|
@ -0,0 +1,27 @@
|
|||
On some architectures, when the kernel loads any userspace program it
|
||||
maps an ELF DSO into that program's address space. This DSO is called
|
||||
the vDSO and it often contains useful and highly-optimized alternatives
|
||||
to real syscalls.
|
||||
|
||||
These functions are called just like ordinary C function according to
|
||||
your platform's ABI. Call them from a sensible context. (For example,
|
||||
if you set CS on x86 to something strange, the vDSO functions are
|
||||
within their rights to crash.) In addition, if you pass a bad
|
||||
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
|
||||
|
||||
To find the DSO, parse the auxiliary vector passed to the program's
|
||||
entry point. The AT_SYSINFO_EHDR entry will point to the vDSO.
|
||||
|
||||
The vDSO uses symbol versioning; whenever you request a symbol from the
|
||||
vDSO, specify the version you are expecting.
|
||||
|
||||
Programs that dynamically link to glibc will use the vDSO automatically.
|
||||
Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c.
|
||||
|
||||
Unless otherwise noted, the set of symbols with any given version and the
|
||||
ABI of those symbols is considered stable. It may vary across architectures,
|
||||
though.
|
||||
|
||||
(As of this writing, this ABI documentation as been confirmed for x86_64.
|
||||
The maintainers of the other vDSO-using architectures should confirm
|
||||
that it is correct for their architecture.)
|
|
@ -0,0 +1,21 @@
|
|||
What: /sys/bus/i2c/devices/.../device
|
||||
Date: February 2011
|
||||
Contact: Minkyu Kang <mk7.kang@samsung.com>
|
||||
Description:
|
||||
show what device is attached
|
||||
NONE - no device
|
||||
USB - USB device is attached
|
||||
UART - UART is attached
|
||||
CHARGER - Charger is attaced
|
||||
JIG - JIG is attached
|
||||
|
||||
What: /sys/bus/i2c/devices/.../switch
|
||||
Date: February 2011
|
||||
Contact: Minkyu Kang <mk7.kang@samsung.com>
|
||||
Description:
|
||||
show or set the state of manual switch
|
||||
VAUDIO - switch to VAUDIO path
|
||||
UART - switch to UART path
|
||||
AUDIO - switch to AUDIO path
|
||||
DHOST - switch to DHOST path
|
||||
AUTO - switch automatically by device
|
|
@ -92,6 +92,14 @@ Description: The mouse has a tracking- and a distance-control-unit. These
|
|||
This file is writeonly.
|
||||
Users: http://roccat.sourceforge.net
|
||||
|
||||
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/talk
|
||||
Date: May 2011
|
||||
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
|
||||
Description: Used to active some easy* functions of the mouse from outside.
|
||||
The data has to be 16 bytes long.
|
||||
This file is writeonly.
|
||||
Users: http://roccat.sourceforge.net
|
||||
|
||||
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu
|
||||
Date: October 2010
|
||||
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
What: /sys/bus/hid/drivers/wiimote/<dev>/led1
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led2
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led3
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led4
|
||||
Date: July 2011
|
||||
KernelVersion: 3.1
|
||||
Contact: David Herrmann <dh.herrmann@googlemail.com>
|
||||
Description: Make it possible to set/get current led state. Reading from it
|
||||
returns 0 if led is off and 1 if it is on. Writing 0 to it
|
||||
disables the led, writing 1 enables it.
|
|
@ -2,13 +2,7 @@ Intro
|
|||
=====
|
||||
|
||||
This document is designed to provide a list of the minimum levels of
|
||||
software necessary to run the 2.6 kernels, as well as provide brief
|
||||
instructions regarding any other "Gotchas" users may encounter when
|
||||
trying life on the Bleeding Edge. If upgrading from a pre-2.4.x
|
||||
kernel, please consult the Changes file included with 2.4.x kernels for
|
||||
additional information; most of that information will not be repeated
|
||||
here. Basically, this document assumes that your system is already
|
||||
functional and running at least 2.4.x kernels.
|
||||
software necessary to run the 3.0 kernels.
|
||||
|
||||
This document is originally based on my "Changes" file for 2.0.x kernels
|
||||
and therefore owes credit to the same people as that file (Jared Mauch,
|
||||
|
@ -22,11 +16,10 @@ Upgrade to at *least* these software revisions before thinking you've
|
|||
encountered a bug! If you're unsure what version you're currently
|
||||
running, the suggested command should tell you.
|
||||
|
||||
Again, keep in mind that this list assumes you are already
|
||||
functionally running a Linux 2.4 kernel. Also, not all tools are
|
||||
necessary on all systems; obviously, if you don't have any ISDN
|
||||
hardware, for example, you probably needn't concern yourself with
|
||||
isdn4k-utils.
|
||||
Again, keep in mind that this list assumes you are already functionally
|
||||
running a Linux kernel. Also, not all tools are necessary on all
|
||||
systems; obviously, if you don't have any ISDN hardware, for example,
|
||||
you probably needn't concern yourself with isdn4k-utils.
|
||||
|
||||
o Gnu C 3.2 # gcc --version
|
||||
o Gnu make 3.80 # make --version
|
||||
|
@ -114,12 +107,12 @@ Ksymoops
|
|||
|
||||
If the unthinkable happens and your kernel oopses, you may need the
|
||||
ksymoops tool to decode it, but in most cases you don't.
|
||||
In the 2.6 kernel it is generally preferred to build the kernel with
|
||||
CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
|
||||
(this also produces better output than ksymoops).
|
||||
If for some reason your kernel is not build with CONFIG_KALLSYMS and
|
||||
you have no way to rebuild and reproduce the Oops with that option, then
|
||||
you can still decode that Oops with ksymoops.
|
||||
It is generally preferred to build the kernel with CONFIG_KALLSYMS so
|
||||
that it produces readable dumps that can be used as-is (this also
|
||||
produces better output than ksymoops). If for some reason your kernel
|
||||
is not build with CONFIG_KALLSYMS and you have no way to rebuild and
|
||||
reproduce the Oops with that option, then you can still decode that Oops
|
||||
with ksymoops.
|
||||
|
||||
Module-Init-Tools
|
||||
-----------------
|
||||
|
@ -261,8 +254,8 @@ needs to be recompiled or (preferably) upgraded.
|
|||
NFS-utils
|
||||
---------
|
||||
|
||||
In 2.4 and earlier kernels, the nfs server needed to know about any
|
||||
client that expected to be able to access files via NFS. This
|
||||
In ancient (2.4 and earlier) kernels, the nfs server needed to know
|
||||
about any client that expected to be able to access files via NFS. This
|
||||
information would be given to the kernel by "mountd" when the client
|
||||
mounted the filesystem, or by "exportfs" at system startup. exportfs
|
||||
would take information about active clients from /var/lib/nfs/rmtab.
|
||||
|
@ -272,11 +265,11 @@ which is not always easy, particularly when trying to implement
|
|||
fail-over. Even when the system is working well, rmtab suffers from
|
||||
getting lots of old entries that never get removed.
|
||||
|
||||
With 2.6 we have the option of having the kernel tell mountd when it
|
||||
gets a request from an unknown host, and mountd can give appropriate
|
||||
export information to the kernel. This removes the dependency on
|
||||
rmtab and means that the kernel only needs to know about currently
|
||||
active clients.
|
||||
With modern kernels we have the option of having the kernel tell mountd
|
||||
when it gets a request from an unknown host, and mountd can give
|
||||
appropriate export information to the kernel. This removes the
|
||||
dependency on rmtab and means that the kernel only needs to know about
|
||||
currently active clients.
|
||||
|
||||
To enable this new functionality, you need to:
|
||||
|
||||
|
|
|
@ -680,8 +680,8 @@ ones already enabled by DEBUG.
|
|||
Chapter 14: Allocating memory
|
||||
|
||||
The kernel provides the following general purpose memory allocators:
|
||||
kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API
|
||||
documentation for further information about them.
|
||||
kmalloc(), kzalloc(), kcalloc(), vmalloc(), and vzalloc(). Please refer to
|
||||
the API documentation for further information about them.
|
||||
|
||||
The preferred form for passing a size of a struct is the following:
|
||||
|
||||
|
|
|
@ -402,8 +402,9 @@
|
|||
!Finclude/net/mac80211.h set_key_cmd
|
||||
!Finclude/net/mac80211.h ieee80211_key_conf
|
||||
!Finclude/net/mac80211.h ieee80211_key_flags
|
||||
!Finclude/net/mac80211.h ieee80211_tkip_key_type
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_key
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
|
||||
!Finclude/net/mac80211.h ieee80211_key_removed
|
||||
</chapter>
|
||||
|
||||
|
|
|
@ -409,7 +409,7 @@ cond_resched(); /* Will sleep */
|
|||
|
||||
<para>
|
||||
You should always compile your kernel
|
||||
<symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn
|
||||
<symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn
|
||||
you if you break these rules. If you <emphasis>do</emphasis> break
|
||||
the rules, you will eventually lock up your box.
|
||||
</para>
|
||||
|
|
|
@ -210,7 +210,7 @@ for (i = 0; i < reqbuf.count; i++)
|
|||
<programlisting>
|
||||
&v4l2-requestbuffers; reqbuf;
|
||||
/* Our current format uses 3 planes per buffer */
|
||||
#define FMT_NUM_PLANES = 3;
|
||||
#define FMT_NUM_PLANES = 3
|
||||
|
||||
struct {
|
||||
void *start[FMT_NUM_PLANES];
|
||||
|
|
|
@ -1164,7 +1164,7 @@
|
|||
}
|
||||
chip->port = pci_resource_start(pci, 0);
|
||||
if (request_irq(pci->irq, snd_mychip_interrupt,
|
||||
IRQF_SHARED, "My Chip", chip)) {
|
||||
IRQF_SHARED, KBUILD_MODNAME, chip)) {
|
||||
printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
|
||||
snd_mychip_free(chip);
|
||||
return -EBUSY;
|
||||
|
@ -1197,7 +1197,7 @@
|
|||
|
||||
/* pci_driver definition */
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Own Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_mychip_ids,
|
||||
.probe = snd_mychip_probe,
|
||||
.remove = __devexit_p(snd_mychip_remove),
|
||||
|
@ -1340,7 +1340,7 @@
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
if (request_irq(pci->irq, snd_mychip_interrupt,
|
||||
IRQF_SHARED, "My Chip", chip)) {
|
||||
IRQF_SHARED, KBUILD_MODNAME, chip)) {
|
||||
printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
|
||||
snd_mychip_free(chip);
|
||||
return -EBUSY;
|
||||
|
@ -1616,7 +1616,7 @@
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Own Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_mychip_ids,
|
||||
.probe = snd_mychip_probe,
|
||||
.remove = __devexit_p(snd_mychip_remove),
|
||||
|
@ -5816,7 +5816,7 @@ struct _snd_pcm_runtime {
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_my_ids,
|
||||
.probe = snd_my_probe,
|
||||
.remove = __devexit_p(snd_my_remove),
|
||||
|
|
|
@ -5,8 +5,8 @@ Although RCU is usually used to protect read-mostly data structures,
|
|||
it is possible to use RCU to provide dynamic non-maskable interrupt
|
||||
handlers, as well as dynamic irq handlers. This document describes
|
||||
how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
|
||||
work in "arch/i386/oprofile/nmi_timer_int.c" and in
|
||||
"arch/i386/kernel/traps.c".
|
||||
work in "arch/x86/oprofile/nmi_timer_int.c" and in
|
||||
"arch/x86/kernel/traps.c".
|
||||
|
||||
The relevant pieces of code are listed below, each followed by a
|
||||
brief explanation.
|
||||
|
|
|
@ -53,8 +53,8 @@ kernel patches.
|
|||
|
||||
12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
|
||||
CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously
|
||||
enabled.
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU
|
||||
and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled.
|
||||
|
||||
13: Has been build- and runtime tested with and without CONFIG_SMP and
|
||||
CONFIG_PREEMPT.
|
||||
|
|
|
@ -164,3 +164,8 @@ In either case, the following conditions must be met:
|
|||
- The boot loader is expected to call the kernel image by jumping
|
||||
directly to the first instruction of the kernel image.
|
||||
|
||||
On CPUs supporting the ARM instruction set, the entry must be
|
||||
made in ARM state, even for a Thumb-2 kernel.
|
||||
|
||||
On CPUs supporting only the Thumb instruction set such as
|
||||
Cortex-M class CPUs, the entry must be made in Thumb state.
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
ROM-able zImage boot from eSD
|
||||
-----------------------------
|
||||
|
||||
An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and
|
||||
SuperH Mobile ARM will to boot directly from the SDHI hardware block.
|
||||
|
||||
This is achieved by the mask ROM loading the first portion of the image into
|
||||
MERAM and then jumping to it. This portion contains loader code which
|
||||
copies the entire image to SDRAM and jumps to it. From there the zImage
|
||||
boot code proceeds as normal, uncompressing the image into its final
|
||||
location and then jumping to it.
|
||||
|
||||
This code has been tested on an mackerel board using the developer 1A eSD
|
||||
boot mode which is configured using the following jumper settings.
|
||||
|
||||
8 7 6 5 4 3 2 1
|
||||
x|x|x|x| |x|x|
|
||||
S4 -+-+-+-+-+-+-+-
|
||||
| | | |x| | |x on
|
||||
|
||||
The eSD card needs to be present in SDHI slot 1 (CN7).
|
||||
As such S1 and S33 also need to be configured as per
|
||||
the notes in arch/arm/mach-shmobile/board-mackerel.c.
|
||||
|
||||
A partial zImage must be written to physical partition #1 (boot)
|
||||
of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to
|
||||
accomplish this.
|
||||
|
||||
e.g.
|
||||
vrl4 < zImage | dd of=/dev/sdX bs=512 count=17
|
||||
|
||||
A full copy of _the same_ zImage should be written to physical partition #1
|
||||
(boot) of the eSD at sector 0. This should _not_ be in vrl4 format.
|
||||
|
||||
vrl4 < zImage | dd of=/dev/sdX bs=512
|
||||
|
||||
Note: The commands above assume that the physical partition has been
|
||||
switched. No such facility currently exists in the Linux Kernel.
|
||||
|
||||
Physical partitions are described in the eSD specification. At the time of
|
||||
writing they are not the same as partitions that are typically configured
|
||||
using fdisk and visible through /proc/partitions
|
|
@ -8,10 +8,13 @@ Introduction
|
|||
|
||||
The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
|
||||
by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
|
||||
S3C2412, S3C2413, S3C2416 S3C2440, S3C2442, S3C2443 and S3C2450 devices
|
||||
S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
|
||||
are supported.
|
||||
|
||||
Support for the S3C2400 and S3C24A0 series are in progress.
|
||||
Support for the S3C2400 and S3C24A0 series was never completed and the
|
||||
corresponding code has been removed after a while. If someone wishes to
|
||||
revive this effort, partial support can be retrieved from earlier Linux
|
||||
versions.
|
||||
|
||||
The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
|
||||
included under the arch/arm/mach-s3c2416 directory. Note, whilst core
|
||||
|
|
|
@ -0,0 +1,267 @@
|
|||
Kernel-provided User Helpers
|
||||
============================
|
||||
|
||||
These are segment of kernel provided user code reachable from user space
|
||||
at a fixed address in kernel memory. This is used to provide user space
|
||||
with some operations which require kernel help because of unimplemented
|
||||
native feature and/or instructions in many ARM CPUs. The idea is for this
|
||||
code to be executed directly in user mode for best efficiency but which is
|
||||
too intimate with the kernel counter part to be left to user libraries.
|
||||
In fact this code might even differ from one CPU to another depending on
|
||||
the available instruction set, or whether it is a SMP systems. In other
|
||||
words, the kernel reserves the right to change this code as needed without
|
||||
warning. Only the entry points and their results as documented here are
|
||||
guaranteed to be stable.
|
||||
|
||||
This is different from (but doesn't preclude) a full blown VDSO
|
||||
implementation, however a VDSO would prevent some assembly tricks with
|
||||
constants that allows for efficient branching to those code segments. And
|
||||
since those code segments only use a few cycles before returning to user
|
||||
code, the overhead of a VDSO indirect far call would add a measurable
|
||||
overhead to such minimalistic operations.
|
||||
|
||||
User space is expected to bypass those helpers and implement those things
|
||||
inline (either in the code emitted directly by the compiler, or part of
|
||||
the implementation of a library call) when optimizing for a recent enough
|
||||
processor that has the necessary native support, but only if resulting
|
||||
binaries are already to be incompatible with earlier ARM processors due to
|
||||
useage of similar native instructions for other things. In other words
|
||||
don't make binaries unable to run on earlier processors just for the sake
|
||||
of not using these kernel helpers if your compiled code is not going to
|
||||
use new instructions for other purpose.
|
||||
|
||||
New helpers may be added over time, so an older kernel may be missing some
|
||||
helpers present in a newer kernel. For this reason, programs must check
|
||||
the value of __kuser_helper_version (see below) before assuming that it is
|
||||
safe to call any particular helper. This check should ideally be
|
||||
performed only once at process startup time, and execution aborted early
|
||||
if the required helpers are not provided by the kernel version that
|
||||
process is running on.
|
||||
|
||||
kuser_helper_version
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0ffc
|
||||
|
||||
Reference declaration:
|
||||
|
||||
extern int32_t __kuser_helper_version;
|
||||
|
||||
Definition:
|
||||
|
||||
This field contains the number of helpers being implemented by the
|
||||
running kernel. User space may read this to determine the availability
|
||||
of a particular helper.
|
||||
|
||||
Usage example:
|
||||
|
||||
#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
|
||||
|
||||
void check_kuser_version(void)
|
||||
{
|
||||
if (__kuser_helper_version < 2) {
|
||||
fprintf(stderr, "can't do atomic operations, kernel too old\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
User space may assume that the value of this field never changes
|
||||
during the lifetime of any single process. This means that this
|
||||
field can be read once during the initialisation of a library or
|
||||
startup phase of a program.
|
||||
|
||||
kuser_get_tls
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fe0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void * __kuser_get_tls(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = TLS value
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void * (__kuser_get_tls_t)(void);
|
||||
#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
|
||||
|
||||
void foo()
|
||||
{
|
||||
void *tls = __kuser_get_tls();
|
||||
printf("TLS = %p\n", tls);
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
|
||||
|
||||
kuser_cmpxchg
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fc0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = oldval
|
||||
r1 = newval
|
||||
r2 = ptr
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, ip, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store newval in *ptr only if *ptr is equal to oldval.
|
||||
Return zero if *ptr was changed or non-zero if no exchange happened.
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
|
||||
#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
|
||||
|
||||
int atomic_add(volatile int *ptr, int val)
|
||||
{
|
||||
int old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg(old, new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
|
||||
|
||||
kuser_memory_barrier
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0fa0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void __kuser_memory_barrier(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
none
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Apply any needed memory barrier to preserve consistency with data modified
|
||||
manually and __kuser_cmpxchg usage.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void (__kuser_dmb_t)(void);
|
||||
#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
|
||||
|
||||
kuser_cmpxchg64
|
||||
---------------
|
||||
|
||||
Location: 0xffff0f60
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg64(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = pointer to oldval
|
||||
r1 = pointer to newval
|
||||
r2 = pointer to target value
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, lr, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
|
||||
is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
|
||||
changed or non-zero if no exchange happened.
|
||||
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
|
||||
|
||||
int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
|
||||
{
|
||||
int64_t old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg64(&old, &new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Due to the length of this sequence, this spans 2 conventional kuser
|
||||
"slots", therefore 0xffff0f80 is not used as a valid entry point.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
|
|
@ -9,6 +9,8 @@ the entire SPI transfer. - And not just bits_per_word duration.
|
|||
In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this
|
||||
behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2
|
||||
timing, you can utilize the GPIO controlled SPI Slave Select option instead.
|
||||
In this case, you should use GPIO based CS for all of your slaves and not just
|
||||
the ones using mode 0 or 2 in order to guarantee correct CS toggling behavior.
|
||||
|
||||
You can even use the same pin whose peripheral role is a SSEL,
|
||||
but use it as a GPIO instead.
|
||||
|
|
|
@ -45,9 +45,13 @@ device.
|
|||
|
||||
rq_affinity (RW)
|
||||
----------------
|
||||
If this option is enabled, the block layer will migrate request completions
|
||||
to the CPU that originally submitted the request. For some workloads
|
||||
this provides a significant reduction in CPU cycles due to caching effects.
|
||||
If this option is '1', the block layer will migrate request completions to the
|
||||
cpu "group" that originally submitted the request. For some workloads this
|
||||
provides a significant reduction in CPU cycles due to caching effects.
|
||||
|
||||
For storage configurations that need to maximize distribution of completion
|
||||
processing setting this option to '2' forces the completion to run on the
|
||||
requesting cpu (bypassing the "group" aggregation logic).
|
||||
|
||||
scheduler (RW)
|
||||
--------------
|
||||
|
|
|
@ -214,7 +214,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree:
|
|||
make config
|
||||
make bzImage (or zImage)
|
||||
|
||||
Then install "arch/i386/boot/bzImage" or "arch/i386/boot/zImage" as your
|
||||
Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your
|
||||
standard kernel, run lilo if appropriate, and reboot.
|
||||
|
||||
To create the necessary devices in /dev, the "make_rd" script included in
|
||||
|
|
|
@ -64,9 +64,9 @@ the RAM disk dynamically grows as data is being written into it, a size field
|
|||
is not required. Bits 11 to 13 are not currently used and may as well be zero.
|
||||
These numbers are no magical secrets, as seen below:
|
||||
|
||||
./arch/i386/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
|
||||
./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
|
||||
./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
|
||||
./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
|
||||
./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
|
||||
./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
|
||||
|
||||
Consider a typical two floppy disk setup, where you will have the
|
||||
kernel on disk one, and have already put a RAM disk image onto disk #2.
|
||||
|
@ -85,7 +85,7 @@ The command line equivalent is: "prompt_ramdisk=1"
|
|||
Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
|
||||
So to create disk one of the set, you would do:
|
||||
|
||||
/usr/src/linux# cat arch/i386/boot/zImage > /dev/fd0
|
||||
/usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
|
||||
/usr/src/linux# rdev /dev/fd0 /dev/fd0
|
||||
/usr/src/linux# rdev -r /dev/fd0 49152
|
||||
|
||||
|
|
|
@ -77,7 +77,7 @@ Throttling/Upper Limit policy
|
|||
- Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <byes_per_second>".
|
||||
|
||||
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.read_bps_device
|
||||
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
|
||||
|
||||
Above will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
|
@ -90,7 +90,7 @@ Throttling/Upper Limit policy
|
|||
1024+0 records out
|
||||
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
|
||||
|
||||
Limits for writes can be put using blkio.write_bps_device file.
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
|
||||
Hierarchical Cgroups
|
||||
====================
|
||||
|
@ -286,28 +286,28 @@ Throttling/Upper limit policy files
|
|||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
|
||||
|
||||
- blkio.throttle.write_bps_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
|
||||
|
||||
- blkio.throttle.read_iops_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in IO per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
|
||||
|
||||
- blkio.throttle.write_iops_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in io per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
|
||||
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjectd to both the constraints.
|
||||
|
|
|
@ -23,7 +23,7 @@ New accounting groups can be created under the parent group /sys/fs/cgroup.
|
|||
|
||||
# cd /sys/fs/cgroup
|
||||
# mkdir g1
|
||||
# echo $$ > g1
|
||||
# echo $$ > g1/tasks
|
||||
|
||||
The above steps create a new group g1 and move the current shell
|
||||
process (bash) into it. CPU time consumed by this bash and its children
|
||||
|
|
|
@ -180,7 +180,7 @@ files describing that cpuset:
|
|||
- cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
|
||||
- cpuset.sched_relax_domain_level: the searching range when migrating tasks
|
||||
|
||||
In addition, the root cpuset only has the following file:
|
||||
In addition, only the root cpuset has the following file:
|
||||
- cpuset.memory_pressure_enabled flag: compute memory_pressure?
|
||||
|
||||
New cpusets are created using the mkdir system call or shell
|
||||
|
|
|
@ -168,7 +168,7 @@ in-chipset dynamic frequency switching to policy->min, the upper limit
|
|||
to policy->max, and -if supported- select a performance-oriented
|
||||
setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
|
||||
powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
|
||||
the reference implementation in arch/i386/kernel/cpu/cpufreq/longrun.c
|
||||
the reference implementation in drivers/cpufreq/longrun.c
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ testing purposes. In particular, you should turn on:
|
|||
- DEBUG_SLAB can find a variety of memory allocation and use errors; it
|
||||
should be used on most development kernels.
|
||||
|
||||
- DEBUG_SPINLOCK, DEBUG_SPINLOCK_SLEEP, and DEBUG_MUTEXES will find a
|
||||
- DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP, and DEBUG_MUTEXES will find a
|
||||
number of common locking errors.
|
||||
|
||||
There are quite a few other debugging options, some of which will be
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
* ARM Performance Monitor Units
|
||||
|
||||
ARM cores often have a PMU for counting cpu and cache events like cache misses
|
||||
and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
|
||||
representation in the device tree should be done as under:-
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be one of
|
||||
"arm,cortex-a9-pmu"
|
||||
"arm,cortex-a8-pmu"
|
||||
"arm,arm1176-pmu"
|
||||
"arm,arm1136-pmu"
|
||||
- interrupts : 1 combined interrupt or 1 per core.
|
||||
|
||||
Example:
|
||||
|
||||
pmu {
|
||||
compatible = "arm,cortex-a9-pmu";
|
||||
interrupts = <100 101>;
|
||||
};
|
|
@ -0,0 +1,21 @@
|
|||
* ARM Primecell Peripherals
|
||||
|
||||
ARM, Ltd. Primecell peripherals have a standard id register that can be used to
|
||||
identify the peripheral type, vendor, and revision. This value can be used for
|
||||
driver matching.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be a specific value for peripheral and "arm,primecell"
|
||||
|
||||
Optional properties:
|
||||
|
||||
- arm,primecell-periphid : Value to override the h/w value with
|
||||
|
||||
Example:
|
||||
|
||||
serial@fff36000 {
|
||||
compatible = "arm,pl011", "arm,primecell";
|
||||
arm,primecell-periphid = <0x00341011>;
|
||||
};
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
Freescale SoC SEC Security Engines
|
||||
Freescale SoC SEC Security Engines versions 2.x-3.x
|
||||
|
||||
Required properties:
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
* Freescale i.MX/MXC GPIO controller
|
||||
|
||||
Required properties:
|
||||
- compatible : Should be "fsl,<soc>-gpio"
|
||||
- reg : Address and length of the register set for the device
|
||||
- interrupts : Should be the port interrupt shared by all 32 pins, if
|
||||
one number. If two numbers, the first one is the interrupt shared
|
||||
by low 16 pins and the second one is for high 16 pins.
|
||||
- gpio-controller : Marks the device node as a gpio controller.
|
||||
- #gpio-cells : Should be two. The first cell is the pin number and
|
||||
the second cell is used to specify optional parameters (currently
|
||||
unused).
|
||||
|
||||
Example:
|
||||
|
||||
gpio0: gpio@73f84000 {
|
||||
compatible = "fsl,imx51-gpio", "fsl,imx31-gpio";
|
||||
reg = <0x73f84000 0x4000>;
|
||||
interrupts = <50 51>;
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
};
|
|
@ -4,17 +4,45 @@ Specifying GPIO information for devices
|
|||
1) gpios property
|
||||
-----------------
|
||||
|
||||
Nodes that makes use of GPIOs should define them using `gpios' property,
|
||||
format of which is: <&gpio-controller1-phandle gpio1-specifier
|
||||
&gpio-controller2-phandle gpio2-specifier
|
||||
0 /* holes are permitted, means no GPIO 3 */
|
||||
&gpio-controller4-phandle gpio4-specifier
|
||||
...>;
|
||||
Nodes that makes use of GPIOs should specify them using one or more
|
||||
properties, each containing a 'gpio-list':
|
||||
|
||||
Note that gpio-specifier length is controller dependent.
|
||||
gpio-list ::= <single-gpio> [gpio-list]
|
||||
single-gpio ::= <gpio-phandle> <gpio-specifier>
|
||||
gpio-phandle : phandle to gpio controller node
|
||||
gpio-specifier : Array of #gpio-cells specifying specific gpio
|
||||
(controller specific)
|
||||
|
||||
GPIO properties should be named "[<name>-]gpios". Exact
|
||||
meaning of each gpios property must be documented in the device tree
|
||||
binding for each device.
|
||||
|
||||
For example, the following could be used to describe gpios pins to use
|
||||
as chip select lines; with chip selects 0, 1 and 3 populated, and chip
|
||||
select 2 left empty:
|
||||
|
||||
gpio1: gpio1 {
|
||||
gpio-controller
|
||||
#gpio-cells = <2>;
|
||||
};
|
||||
gpio2: gpio2 {
|
||||
gpio-controller
|
||||
#gpio-cells = <1>;
|
||||
};
|
||||
[...]
|
||||
chipsel-gpios = <&gpio1 12 0>,
|
||||
<&gpio1 13 0>,
|
||||
<0>, /* holes are permitted, means no GPIO 2 */
|
||||
<&gpio2 2>;
|
||||
|
||||
Note that gpio-specifier length is controller dependent. In the
|
||||
above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
|
||||
only uses one.
|
||||
|
||||
gpio-specifier may encode: bank, pin position inside the bank,
|
||||
whether pin is open-drain and whether pin is logically inverted.
|
||||
Exact meaning of each specifier cell is controller specific, and must
|
||||
be documented in the device tree binding for the device.
|
||||
|
||||
Example of the node using GPIOs:
|
||||
|
||||
|
@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
|
|||
2) gpio-controller nodes
|
||||
------------------------
|
||||
|
||||
Every GPIO controller node must have #gpio-cells property defined,
|
||||
this information will be used to translate gpio-specifiers.
|
||||
Every GPIO controller node must both an empty "gpio-controller"
|
||||
property, and have #gpio-cells contain the size of the gpio-specifier.
|
||||
|
||||
Example of two SOC GPIO banks defined as gpio-controller nodes:
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
NVIDIA Tegra 2 GPIO controller
|
||||
|
||||
Required properties:
|
||||
- compatible : "nvidia,tegra20-gpio"
|
||||
- #gpio-cells : Should be two. The first cell is the pin number and the
|
||||
second cell is used to specify optional parameters:
|
||||
- bit 0 specifies polarity (0 for normal, 1 for inverted)
|
||||
- gpio-controller : Marks the device node as a GPIO controller.
|
|
@ -0,0 +1,22 @@
|
|||
* Freescale (Enhanced) Configurable Serial Peripheral Interface
|
||||
(CSPI/eCSPI) for i.MX
|
||||
|
||||
Required properties:
|
||||
- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
|
||||
- reg : Offset and length of the register set for the device
|
||||
- interrupts : Should contain CSPI/eCSPI interrupt
|
||||
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
||||
- cs-gpios : Specifies the gpio pins to be used for chipselects.
|
||||
|
||||
Example:
|
||||
|
||||
ecspi@70010000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "fsl,imx51-ecspi";
|
||||
reg = <0x70010000 0x4000>;
|
||||
interrupts = <36>;
|
||||
fsl,spi-num-chipselects = <2>;
|
||||
cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */
|
||||
<&gpio3 25 0>; /* GPIO4_25 */
|
||||
};
|
|
@ -0,0 +1,5 @@
|
|||
NVIDIA Tegra 2 SPI device
|
||||
|
||||
Required properties:
|
||||
- compatible : should be "nvidia,tegra20-spi".
|
||||
- gpios : should specify GPIOs used for chipselect.
|
|
@ -0,0 +1,36 @@
|
|||
* UART (Universal Asynchronous Receiver/Transmitter)
|
||||
|
||||
Required properties:
|
||||
- compatible : one of:
|
||||
- "ns8250"
|
||||
- "ns16450"
|
||||
- "ns16550a"
|
||||
- "ns16550"
|
||||
- "ns16750"
|
||||
- "ns16850"
|
||||
- "nvidia,tegra20-uart"
|
||||
- "ibm,qpace-nwp-serial"
|
||||
- "serial" if the port type is unknown.
|
||||
- reg : offset and length of the register set for the device.
|
||||
- interrupts : should contain uart interrupt.
|
||||
- clock-frequency : the input clock frequency for the UART.
|
||||
|
||||
Optional properties:
|
||||
- current-speed : the current active speed of the UART.
|
||||
- reg-offset : offset to apply to the mapbase from the start of the registers.
|
||||
- reg-shift : quantity to shift the register offsets by.
|
||||
- reg-io-width : the size (in bytes) of the IO accesses that should be
|
||||
performed on the device. There are some systems that require 32-bit
|
||||
accesses to the UART (e.g. TI davinci).
|
||||
- used-by-rtas : set to indicate that the port is in use by the OpenFirmware
|
||||
RTAS and should not be registered.
|
||||
|
||||
Example:
|
||||
|
||||
uart@80230000 {
|
||||
compatible = "ns8250";
|
||||
reg = <0x80230000 0x100>;
|
||||
clock-frequency = <3686400>;
|
||||
interrupts = <10>;
|
||||
reg-shift = <2>;
|
||||
};
|
|
@ -184,7 +184,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
|
|||
|
||||
A much more powerful interface, /proc/<pid>/oom_score_adj, was
|
||||
introduced with the oom killer rewrite that allows users to increase or
|
||||
decrease the badness() score linearly. This interface will replace
|
||||
decrease the badness score linearly. This interface will replace
|
||||
/proc/<pid>/oom_adj.
|
||||
|
||||
A warning will be emitted to the kernel log if an application uses this
|
||||
|
@ -193,6 +193,20 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
|
|||
|
||||
---------------------------
|
||||
|
||||
What: CS5535/CS5536 obsolete GPIO driver
|
||||
When: June 2011
|
||||
Files: drivers/staging/cs5535_gpio/*
|
||||
Check: drivers/staging/cs5535_gpio/cs5535_gpio.c
|
||||
Why: A newer driver replaces this; it is drivers/gpio/cs5535-gpio.c, and
|
||||
integrates with the Linux GPIO subsystem. The old driver has been
|
||||
moved to staging, and will be removed altogether around 3.0.
|
||||
Please test the new driver, and ensure that the functionality you
|
||||
need and any bugfixes from the old driver are available in the new
|
||||
one.
|
||||
Who: Andres Salomon <dilinger@queued.net>
|
||||
|
||||
--------------------------
|
||||
|
||||
What: remove EXPORT_SYMBOL(kernel_thread)
|
||||
When: August 2006
|
||||
Files: arch/*/kernel/*_ksyms.c
|
||||
|
@ -280,7 +294,7 @@ When: The schedule was July 2008, but it was decided that we are going to keep t
|
|||
Why: The support code for the old firmware hurts code readability/maintainability
|
||||
and slightly hurts runtime performance. Bugfixes for the old firmware
|
||||
are not provided by Broadcom anymore.
|
||||
Who: Michael Buesch <mb@bu3sch.de>
|
||||
Who: Michael Buesch <m@bues.ch>
|
||||
|
||||
---------------------------
|
||||
|
||||
|
@ -416,7 +430,7 @@ Who: Avi Kivity <avi@redhat.com>
|
|||
----------------------------
|
||||
|
||||
What: iwlwifi 50XX module parameters
|
||||
When: 2.6.40
|
||||
When: 3.0
|
||||
Why: The "..50" modules parameters were used to configure 5000 series and
|
||||
up devices; different set of module parameters also available for 4965
|
||||
with same functionalities. Consolidate both set into single place
|
||||
|
@ -427,7 +441,7 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com>
|
|||
----------------------------
|
||||
|
||||
What: iwl4965 alias support
|
||||
When: 2.6.40
|
||||
When: 3.0
|
||||
Why: Internal alias support has been present in module-init-tools for some
|
||||
time, the MODULE_ALIAS("iwl4965") boilerplate aliases can be removed
|
||||
with no impact.
|
||||
|
@ -468,7 +482,7 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
|
|||
----------------------------
|
||||
|
||||
What: iwlwifi disable_hw_scan module parameters
|
||||
When: 2.6.40
|
||||
When: 3.0
|
||||
Why: Hareware scan is the prefer method for iwlwifi devices for
|
||||
scanning operation. Remove software scan support for all the
|
||||
iwlwifi devices.
|
||||
|
@ -479,7 +493,7 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com>
|
|||
|
||||
What: access to nfsd auth cache through sys_nfsservctl or '.' files
|
||||
in the 'nfsd' filesystem.
|
||||
When: 2.6.40
|
||||
When: 3.0
|
||||
Why: This is a legacy interface which have been replaced by a more
|
||||
dynamic cache. Continuing to maintain this interface is an
|
||||
unnecessary burden.
|
||||
|
@ -487,16 +501,6 @@ Who: NeilBrown <neilb@suse.de>
|
|||
|
||||
----------------------------
|
||||
|
||||
What: cancel_rearming_delayed_work[queue]()
|
||||
When: 2.6.39
|
||||
|
||||
Why: The functions have been superceded by cancel_delayed_work_sync()
|
||||
quite some time ago. The conversion is trivial and there is no
|
||||
in-kernel user left.
|
||||
Who: Tejun Heo <tj@kernel.org>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: Legacy, non-standard chassis intrusion detection interface.
|
||||
When: June 2011
|
||||
Why: The adm9240, w83792d and w83793 hardware monitoring drivers have
|
||||
|
@ -514,22 +518,6 @@ Files: net/netfilter/xt_connlimit.c
|
|||
|
||||
----------------------------
|
||||
|
||||
What: noswapaccount kernel command line parameter
|
||||
When: 2.6.40
|
||||
Why: The original implementation of memsw feature enabled by
|
||||
CONFIG_CGROUP_MEM_RES_CTLR_SWAP could be disabled by the noswapaccount
|
||||
kernel parameter (introduced in 2.6.29-rc1). Later on, this decision
|
||||
turned out to be not ideal because we cannot have the feature compiled
|
||||
in and disabled by default and let only interested to enable it
|
||||
(e.g. general distribution kernels might need it). Therefore we have
|
||||
added swapaccount[=0|1] parameter (introduced in 2.6.37) which provides
|
||||
the both possibilities. If we remove noswapaccount we will have
|
||||
less command line parameters with the same functionality and we
|
||||
can also cleanup the parameter handling a bit ().
|
||||
Who: Michal Hocko <mhocko@suse.cz>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: ipt_addrtype match include file
|
||||
When: 2012
|
||||
Why: superseded by xt_addrtype
|
||||
|
@ -548,7 +536,7 @@ Who: Jean Delvare <khali@linux-fr.org>
|
|||
----------------------------
|
||||
|
||||
What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver
|
||||
When: 2.6.42
|
||||
When: 3.2
|
||||
Why: The information passed to the driver by this ioctl is now queried
|
||||
dynamically from the device.
|
||||
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
||||
|
@ -556,7 +544,7 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
|||
----------------------------
|
||||
|
||||
What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver
|
||||
When: 2.6.42
|
||||
When: 3.2
|
||||
Why: Used only by applications compiled against older driver versions.
|
||||
Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls.
|
||||
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
||||
|
@ -564,8 +552,30 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
|||
----------------------------
|
||||
|
||||
What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver
|
||||
When: 2.6.42
|
||||
When: 3.2
|
||||
Why: Superseded by the UVCIOC_CTRL_QUERY ioctl.
|
||||
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: For VIDIOC_S_FREQUENCY the type field must match the device node's type.
|
||||
If not, return -EINVAL.
|
||||
When: 3.2
|
||||
Why: It makes no sense to switch the tuner to radio mode by calling
|
||||
VIDIOC_S_FREQUENCY on a video node, or to switch the tuner to tv mode by
|
||||
calling VIDIOC_S_FREQUENCY on a radio node. This is the first step of a
|
||||
move to more consistent handling of tv and radio tuners.
|
||||
Who: Hans Verkuil <hans.verkuil@cisco.com>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: Opening a radio device node will no longer automatically switch the
|
||||
tuner mode from tv to radio.
|
||||
When: 3.3
|
||||
Why: Just opening a V4L device should not change the state of the hardware
|
||||
like that. It's very unexpected and against the V4L spec. Instead, you
|
||||
switch to radio mode by calling VIDIOC_S_FREQUENCY. This is the second
|
||||
and last step of the move to consistent handling of tv and radio tuners.
|
||||
Who: Hans Verkuil <hans.verkuil@cisco.com>
|
||||
|
||||
----------------------------
|
||||
|
|
|
@ -52,7 +52,7 @@ ata *);
|
|||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*get_acl)(struct inode *, int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
|
@ -80,7 +80,7 @@ put_link: no
|
|||
truncate: yes (see below)
|
||||
setattr: yes
|
||||
permission: no (may not block if called in rcu-walk mode)
|
||||
check_acl: no
|
||||
get_acl: no
|
||||
getattr: no
|
||||
setxattr: yes
|
||||
getxattr: no
|
||||
|
@ -412,7 +412,7 @@ prototypes:
|
|||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
int (*fsync) (struct file *, int datasync);
|
||||
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
|
||||
int (*aio_fsync) (struct kiocb *, int datasync);
|
||||
int (*fasync) (int, struct file *, int);
|
||||
int (*lock) (struct file *, int, struct file_lock *);
|
||||
|
@ -438,9 +438,7 @@ prototypes:
|
|||
|
||||
locking rules:
|
||||
All may block except for ->setlease.
|
||||
No VFS locks held on entry except for ->fsync and ->setlease.
|
||||
|
||||
->fsync() has i_mutex on inode.
|
||||
No VFS locks held on entry except for ->setlease.
|
||||
|
||||
->setlease has the file_list_lock held and must not sleep.
|
||||
|
||||
|
|
|
@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request -
|
|||
in which case the page will not be stored in the cache this time.
|
||||
|
||||
|
||||
BULK INODE PAGE UNCACHE
|
||||
-----------------------
|
||||
|
||||
A convenience routine is provided to perform an uncache on all the pages
|
||||
attached to an inode. This assumes that the pages on the inode correspond on a
|
||||
1:1 basis with the pages in the cache.
|
||||
|
||||
void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
|
||||
struct inode *inode);
|
||||
|
||||
This takes the netfs cookie that the pages were cached with and the inode that
|
||||
the pages are attached to. This function will wait for pages to finish being
|
||||
written to the cache and for the cache to finish with the page generally. No
|
||||
error is returned.
|
||||
|
||||
|
||||
==========================
|
||||
INDEX AND DATA FILE UPDATE
|
||||
==========================
|
||||
|
|
|
@ -226,7 +226,7 @@ They depend on various facilities being available:
|
|||
cdrecord.
|
||||
|
||||
e.g.
|
||||
cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
|
||||
cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
|
||||
|
||||
For more information on isolinux, including how to create bootdisks
|
||||
for prebuilt kernels, see http://syslinux.zytor.com/
|
||||
|
|
|
@ -40,7 +40,6 @@ Features which NILFS2 does not support yet:
|
|||
- POSIX ACLs
|
||||
- quotas
|
||||
- fsck
|
||||
- resize
|
||||
- defragmentation
|
||||
|
||||
Mount options
|
||||
|
|
|
@ -398,12 +398,34 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
|
|||
so the i_size should not change when hole punching, even when puching the end of
|
||||
a file off.
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
->get_sb() is gone. Switch to use of ->mount(). Typically it's just
|
||||
a matter of switching from calling get_sb_... to mount_... and changing the
|
||||
function type. If you were doing it manually, just switch from setting ->mnt_root
|
||||
to some pointer to returning that pointer. On errors return ERR_PTR(...).
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
->permission() and generic_permission()have lost flags
|
||||
argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
|
||||
generic_permission() has also lost the check_acl argument; ACL checking
|
||||
has been taken to VFS and filesystems need to provide a non-NULL ->i_op->get_acl
|
||||
to read an ACL from disk.
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
If you implement your own ->llseek() you must handle SEEK_HOLE and
|
||||
SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to
|
||||
support it in some way. The generic handler assumes that the entire file is
|
||||
data and there is a virtual hole at the end of the file. So if the provided
|
||||
offset is less than i_size and SEEK_DATA is specified, return the same offset.
|
||||
If the above is true for the offset and you are given SEEK_HOLE, return the end
|
||||
of the file. If the offset is i_size or greater return -ENXIO in either case.
|
||||
|
||||
[mandatory]
|
||||
If you have your own ->fsync() you must make sure to call
|
||||
filemap_write_and_wait_range() so that all dirty pages are synced out properly.
|
||||
You must also keep in mind that ->fsync() is not called with i_mutex held
|
||||
anymore, so if you require i_mutex locking you must make sure to take it and
|
||||
release it yourself.
|
||||
|
|
|
@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0
|
|||
to UBI and mount volume "rootfs":
|
||||
ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
|
||||
|
||||
|
||||
Module Parameters for Debugging
|
||||
===============================
|
||||
|
||||
When UBIFS has been compiled with debugging enabled, there are 2 module
|
||||
parameters that are available to control aspects of testing and debugging.
|
||||
|
||||
debug_chks Selects extra checks that UBIFS can do while running:
|
||||
|
||||
Check Flag value
|
||||
|
||||
General checks 1
|
||||
Check Tree Node Cache (TNC) 2
|
||||
Check indexing tree size 4
|
||||
Check orphan area 8
|
||||
Check old indexing tree 16
|
||||
Check LEB properties (lprops) 32
|
||||
Check leaf nodes and inodes 64
|
||||
|
||||
debug_tsts Selects a mode of testing, as follows:
|
||||
|
||||
Test mode Flag value
|
||||
|
||||
Failure mode for recovery testing 4
|
||||
|
||||
For example, set debug_chks to 3 to enable general and TNC checks.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
|
|
@ -229,6 +229,8 @@ struct super_operations {
|
|||
|
||||
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
|
||||
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
|
||||
int (*nr_cached_objects)(struct super_block *);
|
||||
void (*free_cached_objects)(struct super_block *, int);
|
||||
};
|
||||
|
||||
All methods are called without any locks being held, unless otherwise
|
||||
|
@ -301,6 +303,26 @@ or bottom half).
|
|||
|
||||
quota_write: called by the VFS to write to filesystem quota file.
|
||||
|
||||
nr_cached_objects: called by the sb cache shrinking function for the
|
||||
filesystem to return the number of freeable cached objects it contains.
|
||||
Optional.
|
||||
|
||||
free_cache_objects: called by the sb cache shrinking function for the
|
||||
filesystem to scan the number of objects indicated to try to free them.
|
||||
Optional, but any filesystem implementing this method needs to also
|
||||
implement ->nr_cached_objects for it to be called correctly.
|
||||
|
||||
We can't do anything with any errors that the filesystem might
|
||||
encountered, hence the void return type. This will never be called if
|
||||
the VM is trying to reclaim under GFP_NOFS conditions, hence this
|
||||
method does not need to handle that situation itself.
|
||||
|
||||
Implementations must include conditional reschedule calls inside any
|
||||
scanning loop that is done. This allows the VFS to determine
|
||||
appropriate scan batch sizes without having to worry about whether
|
||||
implementations will cause holdoff problems due to large scan batch
|
||||
sizes.
|
||||
|
||||
Whoever sets up the inode is responsible for filling in the "i_op" field. This
|
||||
is a pointer to a "struct inode_operations" which describes the methods that
|
||||
can be performed on individual inodes.
|
||||
|
@ -333,8 +355,8 @@ struct inode_operations {
|
|||
void * (*follow_link) (struct dentry *, struct nameidata *);
|
||||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*permission) (struct inode *, int);
|
||||
int (*get_acl)(struct inode *, int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
|
@ -423,7 +445,7 @@ otherwise noted.
|
|||
permission: called by the VFS to check for access rights on a POSIX-like
|
||||
filesystem.
|
||||
|
||||
May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
|
||||
May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
|
||||
mode, the filesystem must check the permission without blocking or
|
||||
storing to the inode.
|
||||
|
||||
|
@ -755,7 +777,7 @@ struct file_operations {
|
|||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
int (*fsync) (struct file *, int datasync);
|
||||
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
|
||||
int (*aio_fsync) (struct kiocb *, int datasync);
|
||||
int (*fasync) (int, struct file *, int);
|
||||
int (*lock) (struct file *, int, struct file_lock *);
|
||||
|
|
|
@ -76,7 +76,8 @@ IT8718F, IT8720F, IT8721F, IT8726F, IT8758E and SiS950 chips.
|
|||
These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
|
||||
joysticks and other miscellaneous stuff. For hardware monitoring, they
|
||||
include an 'environment controller' with 3 temperature sensors, 3 fan
|
||||
rotation speed sensors, 8 voltage sensors, and associated alarms.
|
||||
rotation speed sensors, 8 voltage sensors, associated alarms, and chassis
|
||||
intrusion detection.
|
||||
|
||||
The IT8712F and IT8716F additionally feature VID inputs, used to report
|
||||
the Vcore voltage of the processor. The early IT8712F have 5 VID pins,
|
||||
|
|
|
@ -13,7 +13,8 @@ Supported chips:
|
|||
Datasheet: Publicly available at the National Semiconductor website
|
||||
http://www.national.com/
|
||||
|
||||
Author: Frodo Looijaard <frodol@dds.nl>
|
||||
Authors: Frodo Looijaard <frodol@dds.nl>
|
||||
Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
Kernel driver sch5636
|
||||
=====================
|
||||
|
||||
Supported chips:
|
||||
* SMSC SCH5636
|
||||
Prefix: 'sch5636'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
|
||||
Author: Hans de Goede <hdegoede@redhat.com>
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
SMSC SCH5636 Super I/O chips include an embedded microcontroller for
|
||||
hardware monitoring solutions, allowing motherboard manufacturers to create
|
||||
their own custom hwmon solution based upon the SCH5636.
|
||||
|
||||
Currently the sch5636 driver only supports the Fujitsu Theseus SCH5636 based
|
||||
hwmon solution. The sch5636 driver runs a sanity check on loading to ensure
|
||||
it is dealing with a Fujitsu Theseus and not with another custom SCH5636 based
|
||||
hwmon solution.
|
||||
|
||||
The Fujitsu Theseus can monitor up to 5 voltages, 8 fans and 16
|
||||
temperatures. Note that the driver detects how many fan headers /
|
||||
temperature sensors are actually implemented on the motherboard, so you will
|
||||
likely see fewer temperature and fan inputs.
|
||||
|
||||
An application note describing the Theseus' registers, as well as an
|
||||
application note describing the protocol for communicating with the
|
||||
microcontroller is available upon request. Please mail me if you want a copy.
|
|
@ -110,7 +110,7 @@ V. Getting Logical Configuration Table
|
|||
ENOBUFS Buffer not large enough. If this occurs, the required
|
||||
buffer length is written into *(lct->reslen)
|
||||
|
||||
VI. Settting Parameters
|
||||
VI. Setting Parameters
|
||||
|
||||
SYNOPSIS
|
||||
|
||||
|
|
|
@ -506,7 +506,7 @@ to e.g. the Internet:
|
|||
<ISDN subsystem - ISDN support -- HiSax>
|
||||
make clean; make zImage; make modules; make modules_install
|
||||
2. Install the new kernel
|
||||
cp /usr/src/linux/arch/i386/boot/zImage /etc/kernel/linux.isdn
|
||||
cp /usr/src/linux/arch/x86/boot/zImage /etc/kernel/linux.isdn
|
||||
vi /etc/lilo.conf
|
||||
<add new kernel in the bootable image section>
|
||||
lilo
|
||||
|
|
|
@ -68,7 +68,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
|
|||
|
||||
12: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB,
|
||||
CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
|
||||
CONFIG_DEBUG_SPINLOCK_SLEEP これら全てを同時に有効にして動作確認を
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP これら全てを同時に有効にして動作確認を
|
||||
行ってください。
|
||||
|
||||
13: CONFIG_SMP, CONFIG_PREEMPT を有効にした場合と無効にした場合の両方で
|
||||
|
|
|
@ -441,7 +441,7 @@ more details, with real examples.
|
|||
specified if first option are not supported.
|
||||
|
||||
Example:
|
||||
#arch/i386/kernel/Makefile
|
||||
#arch/x86/kernel/Makefile
|
||||
vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
|
||||
|
||||
In the above example, vsyscall-flags will be assigned the option
|
||||
|
@ -460,7 +460,7 @@ more details, with real examples.
|
|||
supported to use an optional second option.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
|
||||
|
||||
In the above example, cflags-y will be assigned the option
|
||||
|
@ -522,7 +522,7 @@ more details, with real examples.
|
|||
even though the option was accepted by gcc.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
cflags-y += $(shell \
|
||||
if [ $(call cc-version) -ge 0300 ] ; then \
|
||||
echo "-mregparm=3"; fi ;)
|
||||
|
@ -802,7 +802,7 @@ but in the architecture makefiles where the kbuild infrastructure
|
|||
is not sufficient this sometimes needs to be explicit.
|
||||
|
||||
Example:
|
||||
#arch/i386/boot/Makefile
|
||||
#arch/x86/boot/Makefile
|
||||
subdir- := compressed/
|
||||
|
||||
The above assignment instructs kbuild to descend down in the
|
||||
|
@ -812,12 +812,12 @@ To support the clean infrastructure in the Makefiles that builds the
|
|||
final bootimage there is an optional target named archclean:
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
archclean:
|
||||
$(Q)$(MAKE) $(clean)=arch/i386/boot
|
||||
$(Q)$(MAKE) $(clean)=arch/x86/boot
|
||||
|
||||
When "make clean" is executed, make will descend down in arch/i386/boot,
|
||||
and clean as usual. The Makefile located in arch/i386/boot/ may use
|
||||
When "make clean" is executed, make will descend down in arch/x86/boot,
|
||||
and clean as usual. The Makefile located in arch/x86/boot/ may use
|
||||
the subdir- trick to descend further down.
|
||||
|
||||
Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
|
||||
|
@ -882,7 +882,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
LDFLAGS_vmlinux uses the LDFLAGS_$@ support.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
LDFLAGS_vmlinux := -e stext
|
||||
|
||||
OBJCOPYFLAGS objcopy flags
|
||||
|
@ -920,14 +920,14 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
Often, the KBUILD_CFLAGS variable depends on the configuration.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
cflags-$(CONFIG_M386) += -march=i386
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
|
||||
Many arch Makefiles dynamically run the target C compiler to
|
||||
probe supported options:
|
||||
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
|
||||
...
|
||||
cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\
|
||||
|
@ -1038,8 +1038,8 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
into the arch/$(ARCH)/boot/Makefile.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
boot := arch/i386/boot
|
||||
#arch/x86/Makefile
|
||||
boot := arch/x86/boot
|
||||
bzImage: vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
|
||||
|
||||
|
@ -1051,7 +1051,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
To support this, $(archhelp) must be defined.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
define archhelp
|
||||
echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)'
|
||||
endif
|
||||
|
@ -1065,7 +1065,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
from vmlinux.
|
||||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
#arch/x86/Makefile
|
||||
all: bzImage
|
||||
|
||||
When "make" is executed without arguments, bzImage will be built.
|
||||
|
@ -1083,7 +1083,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
2) kbuild knows what files to delete during "make clean"
|
||||
|
||||
Example:
|
||||
#arch/i386/kernel/Makefile
|
||||
#arch/x86/kernel/Makefile
|
||||
extra-y := head.o init_task.o
|
||||
|
||||
In this example, extra-y is used to list object files that
|
||||
|
@ -1133,7 +1133,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
Compress target. Use maximum compression to compress target.
|
||||
|
||||
Example:
|
||||
#arch/i386/boot/Makefile
|
||||
#arch/x86/boot/Makefile
|
||||
LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
|
||||
LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext
|
||||
|
||||
|
@ -1193,7 +1193,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
|
||||
When updating the $(obj)/bzImage target, the line
|
||||
|
||||
BUILD arch/i386/boot/bzImage
|
||||
BUILD arch/x86/boot/bzImage
|
||||
|
||||
will be displayed with "make KBUILD_VERBOSE=0".
|
||||
|
||||
|
@ -1207,7 +1207,7 @@ When kbuild executes, the following steps are followed (roughly):
|
|||
kbuild knows .lds files and includes a rule *lds.S -> *lds.
|
||||
|
||||
Example:
|
||||
#arch/i386/kernel/Makefile
|
||||
#arch/x86/kernel/Makefile
|
||||
always := vmlinux.lds
|
||||
|
||||
#Makefile
|
||||
|
|
|
@ -1159,10 +1159,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
for all guests.
|
||||
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
|
||||
|
||||
kvm-intel.bypass_guest_pf=
|
||||
[KVM,Intel] Disables bypassing of guest page faults
|
||||
on Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.ept= [KVM,Intel] Disable extended page tables
|
||||
(virtualized MMU) support on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
@ -1737,6 +1733,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
|
||||
fault handling.
|
||||
|
||||
no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
|
||||
steal time is computed, but won't influence scheduler
|
||||
behaviour
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
|
@ -2015,6 +2015,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
the default.
|
||||
off: Turn ECRC off
|
||||
on: Turn ECRC on.
|
||||
realloc reallocate PCI resources if allocations done by BIOS
|
||||
are erroneous.
|
||||
|
||||
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
|
||||
Management.
|
||||
|
|
|
@ -534,6 +534,8 @@ Events that are never propagated by the driver:
|
|||
0x2404 System is waking up from hibernation to undock
|
||||
0x2405 System is waking up from hibernation to eject bay
|
||||
0x5010 Brightness level changed/control event
|
||||
0x6000 KEYBOARD: Numlock key pressed
|
||||
0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
|
||||
|
||||
Events that are propagated by the driver to userspace:
|
||||
|
||||
|
@ -545,6 +547,8 @@ Events that are propagated by the driver to userspace:
|
|||
0x3006 Bay hotplug request (hint to power up SATA link when
|
||||
the optical drive tray is ejected)
|
||||
0x4003 Undocked (see 0x2x04), can sleep again
|
||||
0x4010 Docked into hotplug port replicator (non-ACPI dock)
|
||||
0x4011 Undocked from hotplug port replicator (non-ACPI dock)
|
||||
0x500B Tablet pen inserted into its storage bay
|
||||
0x500C Tablet pen removed from its storage bay
|
||||
0x6011 ALARM: battery is too hot
|
||||
|
@ -552,6 +556,7 @@ Events that are propagated by the driver to userspace:
|
|||
0x6021 ALARM: a sensor is too hot
|
||||
0x6022 ALARM: a sensor is extremely hot
|
||||
0x6030 System thermal table changed
|
||||
0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
|
||||
|
||||
Battery nearly empty alarms are a last resort attempt to get the
|
||||
operating system to hibernate or shutdown cleanly (0x2313), or shutdown
|
||||
|
|
|
@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
|
|||
RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h
|
||||
SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h
|
||||
HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
|
||||
APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c
|
||||
APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
|
||||
CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
|
||||
DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
|
||||
DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c
|
||||
|
|
|
@ -11,7 +11,7 @@ Adapter Detection
|
|||
|
||||
The ideal MCA adapter detection is done through the use of the
|
||||
Programmable Option Select registers. Generic functions for doing
|
||||
this have been added in include/linux/mca.h and arch/i386/kernel/mca.c.
|
||||
this have been added in include/linux/mca.h and arch/x86/kernel/mca_32.c.
|
||||
Everything needed to detect adapters and read (and write) configuration
|
||||
information is there. A number of MCA-specific drivers already use
|
||||
this. The typical probe code looks like the following:
|
||||
|
@ -81,7 +81,7 @@ more people use shared IRQs on PCI machines.
|
|||
In general, an interrupt must be acknowledged not only at the ICU (which
|
||||
is done automagically by the kernel), but at the device level. In
|
||||
particular, IRQ 0 must be reset after a timer interrupt (now done in
|
||||
arch/i386/kernel/time.c) or the first timer interrupt hangs the system.
|
||||
arch/x86/kernel/time.c) or the first timer interrupt hangs the system.
|
||||
There were also problems with the 1.3.x floppy drivers, but that seems
|
||||
to have been fixed.
|
||||
|
||||
|
|
|
@ -4,3 +4,5 @@ mmc-dev-attrs.txt
|
|||
- info on SD and MMC device attributes
|
||||
mmc-dev-parts.txt
|
||||
- info on SD and MMC device partitions
|
||||
mmc-async-req.txt
|
||||
- info on mmc asynchronous requests
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
Rationale
|
||||
=========
|
||||
|
||||
How significant is the cache maintenance overhead?
|
||||
It depends. Fast eMMC and multiple cache levels with speculative cache
|
||||
pre-fetch makes the cache overhead relatively significant. If the DMA
|
||||
preparations for the next request are done in parallel with the current
|
||||
transfer, the DMA preparation overhead would not affect the MMC performance.
|
||||
The intention of non-blocking (asynchronous) MMC requests is to minimize the
|
||||
time between when an MMC request ends and another MMC request begins.
|
||||
Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
|
||||
dma_unmap_sg are processing. Using non-blocking MMC requests makes it
|
||||
possible to prepare the caches for next job in parallel with an active
|
||||
MMC request.
|
||||
|
||||
MMC block driver
|
||||
================
|
||||
|
||||
The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
|
||||
The increase in throughput is proportional to the time it takes to
|
||||
prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
|
||||
a request and how fast the memory is. The faster the MMC/SD is the
|
||||
more significant the prepare request time becomes. Roughly the expected
|
||||
performance gain is 5% for large writes and 10% on large reads on a L2 cache
|
||||
platform. In power save mode, when clocks run on a lower frequency, the DMA
|
||||
preparation may cost even more. As long as these slower preparations are run
|
||||
in parallel with the transfer performance won't be affected.
|
||||
|
||||
Details on measurements from IOZone and mmc_test
|
||||
================================================
|
||||
|
||||
https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
|
||||
|
||||
MMC core API extension
|
||||
======================
|
||||
|
||||
There is one new public function mmc_start_req().
|
||||
It starts a new MMC command request for a host. The function isn't
|
||||
truly non-blocking. If there is an ongoing async request it waits
|
||||
for completion of that request and starts the new one and returns. It
|
||||
doesn't wait for the new request to complete. If there is no ongoing
|
||||
request it starts the new request and returns immediately.
|
||||
|
||||
MMC host extensions
|
||||
===================
|
||||
|
||||
There are two optional members in the mmc_host_ops -- pre_req() and
|
||||
post_req() -- that the host driver may implement in order to move work
|
||||
to before and after the actual mmc_host_ops.request() function is called.
|
||||
In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
|
||||
descriptor, and post_req() runs the dma_unmap_sg().
|
||||
|
||||
Optimize for the first request
|
||||
==============================
|
||||
|
||||
The first request in a series of requests can't be prepared in parallel
|
||||
with the previous transfer, since there is no previous request.
|
||||
The argument is_first_req in pre_req() indicates that there is no previous
|
||||
request. The host driver may optimize for this scenario to minimize
|
||||
the performance loss. A way to optimize for this is to split the current
|
||||
request in two chunks, prepare the first chunk and start the request,
|
||||
and finally prepare the second chunk and start the transfer.
|
||||
|
||||
Pseudocode to handle is_first_req scenario with minimal prepare overhead:
|
||||
|
||||
if (is_first_req && req->size > threshold)
|
||||
/* start MMC transfer for the complete transfer size */
|
||||
mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
|
||||
|
||||
/*
|
||||
* Begin to prepare DMA while cmd is being processed by MMC.
|
||||
* The first chunk of the request should take the same time
|
||||
* to prepare as the "MMC process command time".
|
||||
* If prepare time exceeds MMC cmd time
|
||||
* the transfer is delayed, guesstimate max 4k as first chunk size.
|
||||
*/
|
||||
prepare_1st_chunk_for_dma(req);
|
||||
/* flush pending desc to the DMAC (dmaengine.h) */
|
||||
dma_issue_pending(req->dma_desc);
|
||||
|
||||
prepare_2nd_chunk_for_dma(req);
|
||||
/*
|
||||
* The second issue_pending should be called before MMC runs out
|
||||
* of the first chunk. If the MMC runs out of the first data chunk
|
||||
* before this call, the transfer is delayed.
|
||||
*/
|
||||
dma_issue_pending(req->dma_desc);
|
|
@ -260,7 +260,7 @@ int main(int argc, char *argv[])
|
|||
case 'V': opt_V++; exclusive++; break;
|
||||
|
||||
case '?':
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -268,13 +268,13 @@ int main(int argc, char *argv[])
|
|||
|
||||
/* options check */
|
||||
if (exclusive > 1) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opt_v || opt_V) {
|
||||
printf(version);
|
||||
printf("%s", version);
|
||||
if (opt_V) {
|
||||
res = 0;
|
||||
goto out;
|
||||
|
@ -282,14 +282,14 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
if (opt_u) {
|
||||
printf(usage_msg);
|
||||
printf("%s", usage_msg);
|
||||
res = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opt_h) {
|
||||
printf(usage_msg);
|
||||
printf(help_msg);
|
||||
printf("%s", usage_msg);
|
||||
printf("%s", help_msg);
|
||||
res = 0;
|
||||
goto out;
|
||||
}
|
||||
|
@ -309,7 +309,7 @@ int main(int argc, char *argv[])
|
|||
goto out;
|
||||
} else {
|
||||
/* Just show usage */
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -320,7 +320,7 @@ int main(int argc, char *argv[])
|
|||
master_ifname = *spp++;
|
||||
|
||||
if (master_ifname == NULL) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -339,7 +339,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
if (slave_ifname == NULL) {
|
||||
if (opt_d || opt_c) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER
|
|||
when the number of entries in the pool is very small).
|
||||
Measured in seconds.
|
||||
|
||||
inet_peer_gc_mintime - INTEGER
|
||||
Minimum interval between garbage collection passes. This interval is
|
||||
in effect under high memory pressure on the pool.
|
||||
Measured in seconds.
|
||||
|
||||
inet_peer_gc_maxtime - INTEGER
|
||||
Minimum interval between garbage collection passes. This interval is
|
||||
in effect under low (or absent) memory pressure on the pool.
|
||||
Measured in seconds.
|
||||
|
||||
TCP variables:
|
||||
|
||||
somaxconn - INTEGER
|
||||
|
@ -346,7 +336,7 @@ tcp_orphan_retries - INTEGER
|
|||
when RTO retransmissions remain unacknowledged.
|
||||
See tcp_retries2 for more details.
|
||||
|
||||
The default value is 7.
|
||||
The default value is 8.
|
||||
If your machine is a loaded WEB server,
|
||||
you should think about lowering this value, such sockets
|
||||
may consume significant resources. Cf. tcp_max_orphans.
|
||||
|
@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
|
|||
min: Minimal size of receive buffer used by TCP sockets.
|
||||
It is guaranteed to each TCP socket, even under moderate memory
|
||||
pressure.
|
||||
Default: 8K
|
||||
Default: 1 page
|
||||
|
||||
default: initial size of receive buffer used by TCP sockets.
|
||||
This value overrides net.core.rmem_default used by other protocols.
|
||||
|
@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN
|
|||
tcp_wmem - vector of 3 INTEGERs: min, default, max
|
||||
min: Amount of memory reserved for send buffers for TCP sockets.
|
||||
Each TCP socket has rights to use it due to fact of its birth.
|
||||
Default: 4K
|
||||
Default: 1 page
|
||||
|
||||
default: initial size of send buffer used by TCP sockets. This
|
||||
value overrides net.core.wmem_default used by other protocols.
|
||||
|
@ -553,13 +543,13 @@ udp_rmem_min - INTEGER
|
|||
Minimal size of receive buffer used by UDP sockets in moderation.
|
||||
Each UDP socket is able to use the size for receiving data, even if
|
||||
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
|
||||
Default: 4096
|
||||
Default: 1 page
|
||||
|
||||
udp_wmem_min - INTEGER
|
||||
Minimal size of send buffer used by UDP sockets in moderation.
|
||||
Each UDP socket is able to use the size for sending data, even if
|
||||
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
|
||||
Default: 4096
|
||||
Default: 1 page
|
||||
|
||||
CIPSOv4 Variables:
|
||||
|
||||
|
@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max
|
|||
Default is calculated at boot time from amount of available memory.
|
||||
|
||||
sctp_rmem - vector of 3 INTEGERs: min, default, max
|
||||
See tcp_rmem for a description.
|
||||
Only the first value ("min") is used, "default" and "max" are
|
||||
ignored.
|
||||
|
||||
min: Minimal size of receive buffer used by SCTP socket.
|
||||
It is guaranteed to each SCTP socket (but not association) even
|
||||
under moderate memory pressure.
|
||||
|
||||
Default: 1 page
|
||||
|
||||
sctp_wmem - vector of 3 INTEGERs: min, default, max
|
||||
See tcp_wmem for a description.
|
||||
Currently this tunable has no effect.
|
||||
|
||||
addr_scope_policy - INTEGER
|
||||
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
Netdev features mess and how to get out from it alive
|
||||
=====================================================
|
||||
|
||||
Author:
|
||||
Michał Mirosław <mirq-linux@rere.qmqm.pl>
|
||||
|
||||
|
||||
|
||||
Part I: Feature sets
|
||||
======================
|
||||
|
||||
Long gone are the days when a network card would just take and give packets
|
||||
verbatim. Today's devices add multiple features and bugs (read: offloads)
|
||||
that relieve an OS of various tasks like generating and checking checksums,
|
||||
splitting packets, classifying them. Those capabilities and their state
|
||||
are commonly referred to as netdev features in Linux kernel world.
|
||||
|
||||
There are currently three sets of features relevant to the driver, and
|
||||
one used internally by network core:
|
||||
|
||||
1. netdev->hw_features set contains features whose state may possibly
|
||||
be changed (enabled or disabled) for a particular device by user's
|
||||
request. This set should be initialized in ndo_init callback and not
|
||||
changed later.
|
||||
|
||||
2. netdev->features set contains features which are currently enabled
|
||||
for a device. This should be changed only by network core or in
|
||||
error paths of ndo_set_features callback.
|
||||
|
||||
3. netdev->vlan_features set contains features whose state is inherited
|
||||
by child VLAN devices (limits netdev->features set). This is currently
|
||||
used for all VLAN devices whether tags are stripped or inserted in
|
||||
hardware or software.
|
||||
|
||||
4. netdev->wanted_features set contains feature set requested by user.
|
||||
This set is filtered by ndo_fix_features callback whenever it or
|
||||
some device-specific conditions change. This set is internal to
|
||||
networking core and should not be referenced in drivers.
|
||||
|
||||
|
||||
|
||||
Part II: Controlling enabled features
|
||||
=======================================
|
||||
|
||||
When current feature set (netdev->features) is to be changed, new set
|
||||
is calculated and filtered by calling ndo_fix_features callback
|
||||
and netdev_fix_features(). If the resulting set differs from current
|
||||
set, it is passed to ndo_set_features callback and (if the callback
|
||||
returns success) replaces value stored in netdev->features.
|
||||
NETDEV_FEAT_CHANGE notification is issued after that whenever current
|
||||
set might have changed.
|
||||
|
||||
The following events trigger recalculation:
|
||||
1. device's registration, after ndo_init returned success
|
||||
2. user requested changes in features state
|
||||
3. netdev_update_features() is called
|
||||
|
||||
ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
|
||||
are treated as always returning success.
|
||||
|
||||
A driver that wants to trigger recalculation must do so by calling
|
||||
netdev_update_features() while holding rtnl_lock. This should not be done
|
||||
from ndo_*_features callbacks. netdev->features should not be modified by
|
||||
driver except by means of ndo_fix_features callback.
|
||||
|
||||
|
||||
|
||||
Part III: Implementation hints
|
||||
================================
|
||||
|
||||
* ndo_fix_features:
|
||||
|
||||
All dependencies between features should be resolved here. The resulting
|
||||
set can be reduced further by networking core imposed limitations (as coded
|
||||
in netdev_fix_features()). For this reason it is safer to disable a feature
|
||||
when its dependencies are not met instead of forcing the dependency on.
|
||||
|
||||
This callback should not modify hardware nor driver state (should be
|
||||
stateless). It can be called multiple times between successive
|
||||
ndo_set_features calls.
|
||||
|
||||
Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
|
||||
NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
|
||||
care must be taken as the change won't affect already configured VLANs.
|
||||
|
||||
* ndo_set_features:
|
||||
|
||||
Hardware should be reconfigured to match passed feature set. The set
|
||||
should not be altered unless some error condition happens that can't
|
||||
be reliably detected in ndo_fix_features. In this case, the callback
|
||||
should update netdev->features to match resulting hardware state.
|
||||
Errors returned are not (and cannot be) propagated anywhere except dmesg.
|
||||
(Note: successful return is zero, >0 means silent error.)
|
||||
|
||||
|
||||
|
||||
Part IV: Features
|
||||
===================
|
||||
|
||||
For current list of features, see include/linux/netdev_features.h.
|
||||
This section describes semantics of some of them.
|
||||
|
||||
* Transmit checksumming
|
||||
|
||||
For complete description, see comments near the top of include/linux/skbuff.h.
|
||||
|
||||
Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
|
||||
It means that device can fill TCP/UDP-like checksum anywhere in the packets
|
||||
whatever headers there might be.
|
||||
|
||||
* Transmit TCP segmentation offload
|
||||
|
||||
NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
|
||||
set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
|
||||
|
||||
* Transmit DMA from high memory
|
||||
|
||||
On platforms where this is relevant, NETIF_F_HIGHDMA signals that
|
||||
ndo_start_xmit can handle skbs with frags in high memory.
|
||||
|
||||
* Transmit scatter-gather
|
||||
|
||||
Those features say that ndo_start_xmit can handle fragmented skbs:
|
||||
NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
|
||||
chained skbs (skb->next/prev list).
|
||||
|
||||
* Software features
|
||||
|
||||
Features contained in NETIF_F_SOFT_FEATURES are features of networking
|
||||
stack. Driver should not change behaviour based on them.
|
||||
|
||||
* LLTX driver (deprecated for hardware drivers)
|
||||
|
||||
NETIF_F_LLTX should be set in drivers that implement their own locking in
|
||||
transmit path or don't need locking at all (e.g. software tunnels).
|
||||
In ndo_start_xmit, it is recommended to use a try_lock and return
|
||||
NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
|
||||
protect against other callbacks (the rules you need to find out).
|
||||
|
||||
Don't use it for new drivers.
|
||||
|
||||
* netns-local device
|
||||
|
||||
NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
|
||||
network namespaces (e.g. loopback).
|
||||
|
||||
Don't use it in drivers.
|
||||
|
||||
* VLAN challenged
|
||||
|
||||
NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
|
||||
headers. Some drivers set this because the cards can't handle the bigger MTU.
|
||||
[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
|
||||
VLANs. This may be not useful, though.]
|
|
@ -0,0 +1,128 @@
|
|||
Linux NFC subsystem
|
||||
===================
|
||||
|
||||
The Near Field Communication (NFC) subsystem is required to standardize the
|
||||
NFC device drivers development and to create an unified userspace interface.
|
||||
|
||||
This document covers the architecture overview, the device driver interface
|
||||
description and the userspace interface description.
|
||||
|
||||
Architecture overview
|
||||
---------------------
|
||||
|
||||
The NFC subsystem is responsible for:
|
||||
- NFC adapters management;
|
||||
- Polling for targets;
|
||||
- Low-level data exchange;
|
||||
|
||||
The subsystem is divided in some parts. The 'core' is responsible for
|
||||
providing the device driver interface. On the other side, it is also
|
||||
responsible for providing an interface to control operations and low-level
|
||||
data exchange.
|
||||
|
||||
The control operations are available to userspace via generic netlink.
|
||||
|
||||
The low-level data exchange interface is provided by the new socket family
|
||||
PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
|
||||
|
||||
|
||||
+--------------------------------------+
|
||||
| USER SPACE |
|
||||
+--------------------------------------+
|
||||
^ ^
|
||||
| low-level | control
|
||||
| data exchange | operations
|
||||
| |
|
||||
| v
|
||||
| +-----------+
|
||||
| AF_NFC | netlink |
|
||||
| socket +-----------+
|
||||
| raw ^
|
||||
| |
|
||||
v v
|
||||
+---------+ +-----------+
|
||||
| rawsock | <--------> | core |
|
||||
+---------+ +-----------+
|
||||
^
|
||||
|
|
||||
v
|
||||
+-----------+
|
||||
| driver |
|
||||
+-----------+
|
||||
|
||||
Device Driver Interface
|
||||
-----------------------
|
||||
|
||||
When registering on the NFC subsystem, the device driver must inform the core
|
||||
of the set of supported NFC protocols and the set of ops callbacks. The ops
|
||||
callbacks that must be implemented are the following:
|
||||
|
||||
* start_poll - setup the device to poll for targets
|
||||
* stop_poll - stop on progress polling operation
|
||||
* activate_target - select and initialize one of the targets found
|
||||
* deactivate_target - deselect and deinitialize the selected target
|
||||
* data_exchange - send data and receive the response (transceive operation)
|
||||
|
||||
Userspace interface
|
||||
--------------------
|
||||
|
||||
The userspace interface is divided in control operations and low-level data
|
||||
exchange operation.
|
||||
|
||||
CONTROL OPERATIONS:
|
||||
|
||||
Generic netlink is used to implement the interface to the control operations.
|
||||
The operations are composed by commands and events, all listed below:
|
||||
|
||||
* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
|
||||
* NFC_CMD_START_POLL - setup a specific device to polling for targets
|
||||
* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
|
||||
* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
|
||||
|
||||
* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
|
||||
* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
|
||||
* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
|
||||
are found
|
||||
|
||||
The user must call START_POLL to poll for NFC targets, passing the desired NFC
|
||||
protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
|
||||
state until it finds any target. However, the user can stop the polling
|
||||
operation by calling STOP_POLL command. In this case, it will be checked if
|
||||
the requester of STOP_POLL is the same of START_POLL.
|
||||
|
||||
If the polling operation finds one or more targets, the event TARGETS_FOUND is
|
||||
sent (including the device id). The user must call GET_TARGET to get the list of
|
||||
all targets found by such device. Each reply message has target attributes with
|
||||
relevant information such as the supported NFC protocols.
|
||||
|
||||
All polling operations requested through one netlink socket are stopped when
|
||||
it's closed.
|
||||
|
||||
LOW-LEVEL DATA EXCHANGE:
|
||||
|
||||
The userspace must use PF_NFC sockets to perform any data communication with
|
||||
targets. All NFC sockets use AF_NFC:
|
||||
|
||||
struct sockaddr_nfc {
|
||||
sa_family_t sa_family;
|
||||
__u32 dev_idx;
|
||||
__u32 target_idx;
|
||||
__u32 nfc_protocol;
|
||||
};
|
||||
|
||||
To establish a connection with one target, the user must create an
|
||||
NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
|
||||
struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
|
||||
netlink event. As a target can support more than one NFC protocol, the user
|
||||
must inform which protocol it wants to use.
|
||||
|
||||
Internally, 'connect' will result in an activate_target call to the driver.
|
||||
When the socket is closed, the target is deactivated.
|
||||
|
||||
The data format exchanged through the sockets is NFC protocol dependent. For
|
||||
instance, when communicating with MIFARE tags, the data exchanged are MIFARE
|
||||
commands and their responses.
|
||||
|
||||
The first received package is the response to the first sent package and so
|
||||
on. In order to allow valid "empty" responses, every data received has a NULL
|
||||
header of 1 byte.
|
|
@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
|
|||
(Synopsys IP blocks); it has been fully tested on STLinux platforms.
|
||||
|
||||
Currently this network device driver is for all STM embedded MAC/GMAC
|
||||
(7xxx SoCs). Other platforms start using it i.e. ARM SPEAr.
|
||||
(i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr.
|
||||
|
||||
DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100
|
||||
Universal version 4.0 have been used for developing the first code
|
||||
|
@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp
|
|||
the CPU while having the maximum throughput.
|
||||
|
||||
4.4) WOL
|
||||
Wake up on Lan feature through Magic Frame is only supported for the GMAC
|
||||
Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC
|
||||
core.
|
||||
|
||||
4.5) DMA descriptors
|
||||
|
@ -91,11 +91,15 @@ LRO is not supported.
|
|||
The driver is compatible with PAL to work with PHY and GPHY devices.
|
||||
|
||||
4.9) Platform information
|
||||
Several information came from the platform; please refer to the
|
||||
driver's Header file in include/linux directory.
|
||||
Several driver's information can be passed through the platform
|
||||
These are included in the include/linux/stmmac.h header file
|
||||
and detailed below as well:
|
||||
|
||||
struct plat_stmmacenet_data {
|
||||
struct plat_stmmacenet_data {
|
||||
int bus_id;
|
||||
int phy_addr;
|
||||
int interface;
|
||||
struct stmmac_mdio_bus_data *mdio_bus_data;
|
||||
int pbl;
|
||||
int clk_csr;
|
||||
int has_gmac;
|
||||
|
@ -103,67 +107,135 @@ struct plat_stmmacenet_data {
|
|||
int tx_coe;
|
||||
int bugged_jumbo;
|
||||
int pmt;
|
||||
void (*fix_mac_speed)(void *priv, unsigned int speed);
|
||||
void (*bus_setup)(unsigned long ioaddr);
|
||||
#ifdef CONFIG_STM_DRIVERS
|
||||
struct stm_pad_config *pad_config;
|
||||
#endif
|
||||
void *bsp_priv;
|
||||
};
|
||||
int force_sf_dma_mode;
|
||||
void (*fix_mac_speed)(void *priv, unsigned int speed);
|
||||
void (*bus_setup)(void __iomem *ioaddr);
|
||||
int (*init)(struct platform_device *pdev);
|
||||
void (*exit)(struct platform_device *pdev);
|
||||
void *bsp_priv;
|
||||
};
|
||||
|
||||
Where:
|
||||
- pbl (Programmable Burst Length) is maximum number of
|
||||
beats to be transferred in one DMA transaction.
|
||||
GMAC also enables the 4xPBL by default.
|
||||
- fix_mac_speed and bus_setup are used to configure internal target
|
||||
registers (on STM platforms);
|
||||
- has_gmac: GMAC core is on board (get it at run-time in the next step);
|
||||
- bus_id: bus identifier.
|
||||
- tx_coe: core is able to perform the tx csum in HW.
|
||||
- enh_desc: if sets the MAC will use the enhanced descriptor structure.
|
||||
- clk_csr: CSR Clock range selection.
|
||||
- bugged_jumbo: some HWs are not able to perform the csum in HW for
|
||||
over-sized frames due to limited buffer sizes. Setting this
|
||||
flag the csum will be done in SW on JUMBO frames.
|
||||
o bus_id: bus identifier.
|
||||
o phy_addr: the physical address can be passed from the platform.
|
||||
If it is set to -1 the driver will automatically
|
||||
detect it at run-time by probing all the 32 addresses.
|
||||
o interface: PHY device's interface.
|
||||
o mdio_bus_data: specific platform fields for the MDIO bus.
|
||||
o pbl: the Programmable Burst Length is maximum number of beats to
|
||||
be transferred in one DMA transaction.
|
||||
GMAC also enables the 4xPBL by default.
|
||||
o clk_csr: CSR Clock range selection.
|
||||
o has_gmac: uses the GMAC core.
|
||||
o enh_desc: if sets the MAC will use the enhanced descriptor structure.
|
||||
o tx_coe: core is able to perform the tx csum in HW.
|
||||
o bugged_jumbo: some HWs are not able to perform the csum in HW for
|
||||
over-sized frames due to limited buffer sizes.
|
||||
Setting this flag the csum will be done in SW on
|
||||
JUMBO frames.
|
||||
o pmt: core has the embedded power module (optional).
|
||||
o force_sf_dma_mode: force DMA to use the Store and Forward mode
|
||||
instead of the Threshold.
|
||||
o fix_mac_speed: this callback is used for modifying some syscfg registers
|
||||
(on ST SoCs) according to the link speed negotiated by the
|
||||
physical layer .
|
||||
o bus_setup: perform HW setup of the bus. For example, on some ST platforms
|
||||
this field is used to configure the AMBA bridge to generate more
|
||||
efficient STBus traffic.
|
||||
o init/exit: callbacks used for calling a custom initialisation;
|
||||
this is sometime necessary on some platforms (e.g. ST boxes)
|
||||
where the HW needs to have set some PIO lines or system cfg
|
||||
registers.
|
||||
o custom_cfg: this is a custom configuration that can be passed while
|
||||
initialising the resources.
|
||||
|
||||
struct plat_stmmacphy_data {
|
||||
int bus_id;
|
||||
int phy_addr;
|
||||
unsigned int phy_mask;
|
||||
int interface;
|
||||
int (*phy_reset)(void *priv);
|
||||
void *priv;
|
||||
};
|
||||
The we have:
|
||||
|
||||
struct stmmac_mdio_bus_data {
|
||||
int bus_id;
|
||||
int (*phy_reset)(void *priv);
|
||||
unsigned int phy_mask;
|
||||
int *irqs;
|
||||
int probed_phy_irq;
|
||||
};
|
||||
|
||||
Where:
|
||||
- bus_id: bus identifier;
|
||||
- phy_addr: physical address used for the attached phy device;
|
||||
set it to -1 to get it at run-time;
|
||||
- interface: physical MII interface mode;
|
||||
- phy_reset: hook to reset HW function.
|
||||
o bus_id: bus identifier;
|
||||
o phy_reset: hook to reset the phy device attached to the bus.
|
||||
o phy_mask: phy mask passed when register the MDIO bus within the driver.
|
||||
o irqs: list of IRQs, one per PHY.
|
||||
o probed_phy_irq: if irqs is NULL, use this for probed PHY.
|
||||
|
||||
SOURCES:
|
||||
- Kconfig
|
||||
- Makefile
|
||||
- stmmac_main.c: main network device driver;
|
||||
- stmmac_mdio.c: mdio functions;
|
||||
- stmmac_ethtool.c: ethtool support;
|
||||
- stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
|
||||
Only tested on ST40 platforms based.
|
||||
- stmmac.h: private driver structure;
|
||||
- common.h: common definitions and VFTs;
|
||||
- descs.h: descriptor structure definitions;
|
||||
- dwmac1000_core.c: GMAC core functions;
|
||||
- dwmac1000_dma.c: dma functions for the GMAC chip;
|
||||
- dwmac1000.h: specific header file for the GMAC;
|
||||
- dwmac100_core: MAC 100 core and dma code;
|
||||
- dwmac100_dma.c: dma funtions for the MAC chip;
|
||||
- dwmac1000.h: specific header file for the MAC;
|
||||
- dwmac_lib.c: generic DMA functions shared among chips
|
||||
- enh_desc.c: functions for handling enhanced descriptors
|
||||
- norm_desc.c: functions for handling normal descriptors
|
||||
Below an example how the structures above are using on ST platforms.
|
||||
|
||||
TODO:
|
||||
- XGMAC controller is not supported.
|
||||
- Review the timer optimisation code to use an embedded device that seems to be
|
||||
static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
|
||||
.pbl = 32,
|
||||
.has_gmac = 0,
|
||||
.enh_desc = 0,
|
||||
.fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
|
||||
|
|
||||
|-> to write an internal syscfg
|
||||
| on this platform when the
|
||||
| link speed changes from 10 to
|
||||
| 100 and viceversa
|
||||
.init = &stmmac_claim_resource,
|
||||
|
|
||||
|-> On ST SoC this calls own "PAD"
|
||||
| manager framework to claim
|
||||
| all the resources necessary
|
||||
| (GPIO ...). The .custom_cfg field
|
||||
| is used to pass a custom config.
|
||||
};
|
||||
|
||||
Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
|
||||
there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
|
||||
with fixed_link support.
|
||||
|
||||
static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
|
||||
.bus_id = 1,
|
||||
|
|
||||
|-> phy device on the bus_id 1
|
||||
.phy_reset = phy_reset;
|
||||
|
|
||||
|-> function to provide the phy_reset on this board
|
||||
.phy_mask = 0,
|
||||
};
|
||||
|
||||
static struct fixed_phy_status stmmac0_fixed_phy_status = {
|
||||
.link = 1,
|
||||
.speed = 100,
|
||||
.duplex = 1,
|
||||
};
|
||||
|
||||
During the board's device_init we can configure the first
|
||||
MAC for fixed_link by calling:
|
||||
fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));)
|
||||
and the second one, with a real PHY device attached to the bus,
|
||||
by using the stmmac_mdio_bus_data structure (to provide the id, the
|
||||
reset procedure etc).
|
||||
|
||||
4.10) List of source files:
|
||||
o Kconfig
|
||||
o Makefile
|
||||
o stmmac_main.c: main network device driver;
|
||||
o stmmac_mdio.c: mdio functions;
|
||||
o stmmac_ethtool.c: ethtool support;
|
||||
o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
|
||||
Only tested on ST40 platforms based.
|
||||
o stmmac.h: private driver structure;
|
||||
o common.h: common definitions and VFTs;
|
||||
o descs.h: descriptor structure definitions;
|
||||
o dwmac1000_core.c: GMAC core functions;
|
||||
o dwmac1000_dma.c: dma functions for the GMAC chip;
|
||||
o dwmac1000.h: specific header file for the GMAC;
|
||||
o dwmac100_core: MAC 100 core and dma code;
|
||||
o dwmac100_dma.c: dma funtions for the MAC chip;
|
||||
o dwmac1000.h: specific header file for the MAC;
|
||||
o dwmac_lib.c: generic DMA functions shared among chips
|
||||
o enh_desc.c: functions for handling enhanced descriptors
|
||||
o norm_desc.c: functions for handling normal descriptors
|
||||
|
||||
5) TODO:
|
||||
o XGMAC is not supported.
|
||||
o Review the timer optimisation code to use an embedded device that will be
|
||||
available in new chip generations.
|
||||
|
|
|
@ -506,8 +506,8 @@ routines. Nevertheless, different callback pointers are used in case there is a
|
|||
situation where it actually matters.
|
||||
|
||||
|
||||
Device Power Domains
|
||||
--------------------
|
||||
Device Power Management Domains
|
||||
-------------------------------
|
||||
Sometimes devices share reference clocks or other power resources. In those
|
||||
cases it generally is not possible to put devices into low-power states
|
||||
individually. Instead, a set of devices sharing a power resource can be put
|
||||
|
@ -516,8 +516,8 @@ power resource. Of course, they also need to be put into the full-power state
|
|||
together, by turning the shared power resource on. A set of devices with this
|
||||
property is often referred to as a power domain.
|
||||
|
||||
Support for power domains is provided through the pwr_domain field of struct
|
||||
device. This field is a pointer to an object of type struct dev_power_domain,
|
||||
Support for power domains is provided through the pm_domain field of struct
|
||||
device. This field is a pointer to an object of type struct dev_pm_domain,
|
||||
defined in include/linux/pm.h, providing a set of power management callbacks
|
||||
analogous to the subsystem-level and device driver callbacks that are executed
|
||||
for the given device during all power transitions, instead of the respective
|
||||
|
@ -604,7 +604,7 @@ state temporarily, for example so that its system wakeup capability can be
|
|||
disabled. This all depends on the hardware and the design of the subsystem and
|
||||
device driver in question.
|
||||
|
||||
During system-wide resume from a sleep state it's best to put devices into the
|
||||
full-power state, as explained in Documentation/power/runtime_pm.txt. Refer to
|
||||
that document for more information regarding this particular issue as well as
|
||||
During system-wide resume from a sleep state it's easiest to put devices into
|
||||
the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer
|
||||
to that document for more information regarding this particular issue as well as
|
||||
for information on the device runtime power management framework in general.
|
||||
|
|
|
@ -321,6 +321,8 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with
|
|||
addition to CONFIG_PM as power management feature is required to
|
||||
dynamically scale voltage and frequency in a system.
|
||||
|
||||
opp_free_cpufreq_table - Free up the table allocated by opp_init_cpufreq_table
|
||||
|
||||
7. Data Structures
|
||||
==================
|
||||
Typically an SoC contains multiple voltage domains which are variable. Each
|
||||
|
|
|
@ -1,39 +1,39 @@
|
|||
Run-time Power Management Framework for I/O Devices
|
||||
Runtime Power Management Framework for I/O Devices
|
||||
|
||||
(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
|
||||
(C) 2010 Alan Stern <stern@rowland.harvard.edu>
|
||||
|
||||
1. Introduction
|
||||
|
||||
Support for run-time power management (run-time PM) of I/O devices is provided
|
||||
Support for runtime power management (runtime PM) of I/O devices is provided
|
||||
at the power management core (PM core) level by means of:
|
||||
|
||||
* The power management workqueue pm_wq in which bus types and device drivers can
|
||||
put their PM-related work items. It is strongly recommended that pm_wq be
|
||||
used for queuing all work items related to run-time PM, because this allows
|
||||
used for queuing all work items related to runtime PM, because this allows
|
||||
them to be synchronized with system-wide power transitions (suspend to RAM,
|
||||
hibernation and resume from system sleep states). pm_wq is declared in
|
||||
include/linux/pm_runtime.h and defined in kernel/power/main.c.
|
||||
|
||||
* A number of run-time PM fields in the 'power' member of 'struct device' (which
|
||||
* A number of runtime PM fields in the 'power' member of 'struct device' (which
|
||||
is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
|
||||
be used for synchronizing run-time PM operations with one another.
|
||||
be used for synchronizing runtime PM operations with one another.
|
||||
|
||||
* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in
|
||||
* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
|
||||
include/linux/pm.h).
|
||||
|
||||
* A set of helper functions defined in drivers/base/power/runtime.c that can be
|
||||
used for carrying out run-time PM operations in such a way that the
|
||||
used for carrying out runtime PM operations in such a way that the
|
||||
synchronization between them is taken care of by the PM core. Bus types and
|
||||
device drivers are encouraged to use these functions.
|
||||
|
||||
The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM
|
||||
The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
|
||||
fields of 'struct dev_pm_info' and the core helper functions provided for
|
||||
run-time PM are described below.
|
||||
runtime PM are described below.
|
||||
|
||||
2. Device Run-time PM Callbacks
|
||||
2. Device Runtime PM Callbacks
|
||||
|
||||
There are three device run-time PM callbacks defined in 'struct dev_pm_ops':
|
||||
There are three device runtime PM callbacks defined in 'struct dev_pm_ops':
|
||||
|
||||
struct dev_pm_ops {
|
||||
...
|
||||
|
@ -72,11 +72,11 @@ knows what to do to handle the device).
|
|||
not mean that the device has been put into a low power state. It is
|
||||
supposed to mean, however, that the device will not process data and will
|
||||
not communicate with the CPU(s) and RAM until the subsystem-level resume
|
||||
callback is executed for it. The run-time PM status of a device after
|
||||
callback is executed for it. The runtime PM status of a device after
|
||||
successful execution of the subsystem-level suspend callback is 'suspended'.
|
||||
|
||||
* If the subsystem-level suspend callback returns -EBUSY or -EAGAIN,
|
||||
the device's run-time PM status is 'active', which means that the device
|
||||
the device's runtime PM status is 'active', which means that the device
|
||||
_must_ be fully operational afterwards.
|
||||
|
||||
* If the subsystem-level suspend callback returns an error code different
|
||||
|
@ -104,7 +104,7 @@ the device).
|
|||
|
||||
* Once the subsystem-level resume callback has completed successfully, the PM
|
||||
core regards the device as fully operational, which means that the device
|
||||
_must_ be able to complete I/O operations as needed. The run-time PM status
|
||||
_must_ be able to complete I/O operations as needed. The runtime PM status
|
||||
of the device is then 'active'.
|
||||
|
||||
* If the subsystem-level resume callback returns an error code, the PM core
|
||||
|
@ -130,7 +130,7 @@ device in that case. The value returned by this callback is ignored by the PM
|
|||
core.
|
||||
|
||||
The helper functions provided by the PM core, described in Section 4, guarantee
|
||||
that the following constraints are met with respect to the bus type's run-time
|
||||
that the following constraints are met with respect to the bus type's runtime
|
||||
PM callbacks:
|
||||
|
||||
(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
|
||||
|
@ -142,7 +142,7 @@ PM callbacks:
|
|||
|
||||
(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
|
||||
devices (i.e. the PM core will only execute ->runtime_idle() or
|
||||
->runtime_suspend() for the devices the run-time PM status of which is
|
||||
->runtime_suspend() for the devices the runtime PM status of which is
|
||||
'active').
|
||||
|
||||
(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
|
||||
|
@ -151,7 +151,7 @@ PM callbacks:
|
|||
flag of which is set.
|
||||
|
||||
(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the
|
||||
PM core will only execute ->runtime_resume() for the devices the run-time
|
||||
PM core will only execute ->runtime_resume() for the devices the runtime
|
||||
PM status of which is 'suspended').
|
||||
|
||||
Additionally, the helper functions provided by the PM core obey the following
|
||||
|
@ -171,9 +171,9 @@ rules:
|
|||
scheduled requests to execute the other callbacks for the same device,
|
||||
except for scheduled autosuspends.
|
||||
|
||||
3. Run-time PM Device Fields
|
||||
3. Runtime PM Device Fields
|
||||
|
||||
The following device run-time PM fields are present in 'struct dev_pm_info', as
|
||||
The following device runtime PM fields are present in 'struct dev_pm_info', as
|
||||
defined in include/linux/pm.h:
|
||||
|
||||
struct timer_list suspend_timer;
|
||||
|
@ -205,7 +205,7 @@ defined in include/linux/pm.h:
|
|||
|
||||
unsigned int disable_depth;
|
||||
- used for disabling the helper funcions (they work normally if this is
|
||||
equal to zero); the initial value of it is 1 (i.e. run-time PM is
|
||||
equal to zero); the initial value of it is 1 (i.e. runtime PM is
|
||||
initially disabled for all devices)
|
||||
|
||||
unsigned int runtime_error;
|
||||
|
@ -229,10 +229,10 @@ defined in include/linux/pm.h:
|
|||
suspend to complete; means "start a resume as soon as you've suspended"
|
||||
|
||||
unsigned int run_wake;
|
||||
- set if the device is capable of generating run-time wake-up events
|
||||
- set if the device is capable of generating runtime wake-up events
|
||||
|
||||
enum rpm_status runtime_status;
|
||||
- the run-time PM status of the device; this field's initial value is
|
||||
- the runtime PM status of the device; this field's initial value is
|
||||
RPM_SUSPENDED, which means that each device is initially regarded by the
|
||||
PM core as 'suspended', regardless of its real hardware status
|
||||
|
||||
|
@ -243,7 +243,7 @@ defined in include/linux/pm.h:
|
|||
and pm_runtime_forbid() helper functions
|
||||
|
||||
unsigned int no_callbacks;
|
||||
- indicates that the device does not use the run-time PM callbacks (see
|
||||
- indicates that the device does not use the runtime PM callbacks (see
|
||||
Section 8); it may be modified only by the pm_runtime_no_callbacks()
|
||||
helper function
|
||||
|
||||
|
@ -270,16 +270,16 @@ defined in include/linux/pm.h:
|
|||
|
||||
All of the above fields are members of the 'power' member of 'struct device'.
|
||||
|
||||
4. Run-time PM Device Helper Functions
|
||||
4. Runtime PM Device Helper Functions
|
||||
|
||||
The following run-time PM helper functions are defined in
|
||||
The following runtime PM helper functions are defined in
|
||||
drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
||||
|
||||
void pm_runtime_init(struct device *dev);
|
||||
- initialize the device run-time PM fields in 'struct dev_pm_info'
|
||||
- initialize the device runtime PM fields in 'struct dev_pm_info'
|
||||
|
||||
void pm_runtime_remove(struct device *dev);
|
||||
- make sure that the run-time PM of the device will be disabled after
|
||||
- make sure that the runtime PM of the device will be disabled after
|
||||
removing the device from device hierarchy
|
||||
|
||||
int pm_runtime_idle(struct device *dev);
|
||||
|
@ -289,9 +289,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
int pm_runtime_suspend(struct device *dev);
|
||||
- execute the subsystem-level suspend callback for the device; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'suspended', or
|
||||
success, 1 if the device's runtime PM status was already 'suspended', or
|
||||
error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
|
||||
to suspend the device again in future
|
||||
to suspend the device again in future and -EACCES means that
|
||||
'power.disable_depth' is different from 0
|
||||
|
||||
int pm_runtime_autosuspend(struct device *dev);
|
||||
- same as pm_runtime_suspend() except that the autosuspend delay is taken
|
||||
|
@ -301,10 +302,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
int pm_runtime_resume(struct device *dev);
|
||||
- execute the subsystem-level resume callback for the device; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active' or
|
||||
success, 1 if the device's runtime PM status was already 'active' or
|
||||
error code on failure, where -EAGAIN means it may be safe to attempt to
|
||||
resume the device again in future, but 'power.runtime_error' should be
|
||||
checked additionally
|
||||
checked additionally, and -EACCES means that 'power.disable_depth' is
|
||||
different from 0
|
||||
|
||||
int pm_request_idle(struct device *dev);
|
||||
- submit a request to execute the subsystem-level idle callback for the
|
||||
|
@ -321,7 +323,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
device in future, where 'delay' is the time to wait before queuing up a
|
||||
suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
|
||||
item is queued up immediately); returns 0 on success, 1 if the device's PM
|
||||
run-time status was already 'suspended', or error code if the request
|
||||
runtime status was already 'suspended', or error code if the request
|
||||
hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
|
||||
->runtime_suspend() is already scheduled and not yet expired, the new
|
||||
value of 'delay' will be used as the time to wait
|
||||
|
@ -329,7 +331,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
int pm_request_resume(struct device *dev);
|
||||
- submit a request to execute the subsystem-level resume callback for the
|
||||
device (the request is represented by a work item in pm_wq); returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active', or
|
||||
success, 1 if the device's runtime PM status was already 'active', or
|
||||
error code if the request hasn't been queued up
|
||||
|
||||
void pm_runtime_get_noresume(struct device *dev);
|
||||
|
@ -367,22 +369,32 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
pm_runtime_autosuspend(dev) and return its result
|
||||
|
||||
void pm_runtime_enable(struct device *dev);
|
||||
- enable the run-time PM helper functions to run the device bus type's
|
||||
run-time PM callbacks described in Section 2
|
||||
- decrement the device's 'power.disable_depth' field; if that field is equal
|
||||
to zero, the runtime PM helper functions can execute subsystem-level
|
||||
callbacks described in Section 2 for the device
|
||||
|
||||
int pm_runtime_disable(struct device *dev);
|
||||
- prevent the run-time PM helper functions from running subsystem-level
|
||||
run-time PM callbacks for the device, make sure that all of the pending
|
||||
run-time PM operations on the device are either completed or canceled;
|
||||
- increment the device's 'power.disable_depth' field (if the value of that
|
||||
field was previously zero, this prevents subsystem-level runtime PM
|
||||
callbacks from being run for the device), make sure that all of the pending
|
||||
runtime PM operations on the device are either completed or canceled;
|
||||
returns 1 if there was a resume request pending and it was necessary to
|
||||
execute the subsystem-level resume callback for the device to satisfy that
|
||||
request, otherwise 0 is returned
|
||||
|
||||
int pm_runtime_barrier(struct device *dev);
|
||||
- check if there's a resume request pending for the device and resume it
|
||||
(synchronously) in that case, cancel any other pending runtime PM requests
|
||||
regarding it and wait for all runtime PM operations on it in progress to
|
||||
complete; returns 1 if there was a resume request pending and it was
|
||||
necessary to execute the subsystem-level resume callback for the device to
|
||||
satisfy that request, otherwise 0 is returned
|
||||
|
||||
void pm_suspend_ignore_children(struct device *dev, bool enable);
|
||||
- set/unset the power.ignore_children flag of the device
|
||||
|
||||
int pm_runtime_set_active(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
- clear the device's 'power.runtime_error' flag, set the device's runtime
|
||||
PM status to 'active' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
|
@ -390,7 +402,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
which is not active and the 'power.ignore_children' flag of which is unset
|
||||
|
||||
void pm_runtime_set_suspended(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
- clear the device's 'power.runtime_error' flag, set the device's runtime
|
||||
PM status to 'suspended' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
|
@ -400,6 +412,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
- return true if the device's runtime PM status is 'suspended' and its
|
||||
'power.disable_depth' field is equal to zero, or false otherwise
|
||||
|
||||
bool pm_runtime_status_suspended(struct device *dev);
|
||||
- return true if the device's runtime PM status is 'suspended'
|
||||
|
||||
void pm_runtime_allow(struct device *dev);
|
||||
- set the power.runtime_auto flag for the device and decrease its usage
|
||||
counter (used by the /sys/devices/.../power/control interface to
|
||||
|
@ -411,7 +426,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
effectively prevent the device from being power managed at run time)
|
||||
|
||||
void pm_runtime_no_callbacks(struct device *dev);
|
||||
- set the power.no_callbacks flag for the device and remove the run-time
|
||||
- set the power.no_callbacks flag for the device and remove the runtime
|
||||
PM attributes from /sys/devices/.../power (or prevent them from being
|
||||
added when the device is registered)
|
||||
|
||||
|
@ -431,7 +446,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
|
||||
- set the power.autosuspend_delay value to 'delay' (expressed in
|
||||
milliseconds); if 'delay' is negative then run-time suspends are
|
||||
milliseconds); if 'delay' is negative then runtime suspends are
|
||||
prevented
|
||||
|
||||
unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
|
||||
|
@ -470,35 +485,35 @@ pm_runtime_resume()
|
|||
pm_runtime_get_sync()
|
||||
pm_runtime_put_sync_suspend()
|
||||
|
||||
5. Run-time PM Initialization, Device Probing and Removal
|
||||
5. Runtime PM Initialization, Device Probing and Removal
|
||||
|
||||
Initially, the run-time PM is disabled for all devices, which means that the
|
||||
majority of the run-time PM helper funtions described in Section 4 will return
|
||||
Initially, the runtime PM is disabled for all devices, which means that the
|
||||
majority of the runtime PM helper funtions described in Section 4 will return
|
||||
-EAGAIN until pm_runtime_enable() is called for the device.
|
||||
|
||||
In addition to that, the initial run-time PM status of all devices is
|
||||
In addition to that, the initial runtime PM status of all devices is
|
||||
'suspended', but it need not reflect the actual physical state of the device.
|
||||
Thus, if the device is initially active (i.e. it is able to process I/O), its
|
||||
run-time PM status must be changed to 'active', with the help of
|
||||
runtime PM status must be changed to 'active', with the help of
|
||||
pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
|
||||
|
||||
However, if the device has a parent and the parent's run-time PM is enabled,
|
||||
However, if the device has a parent and the parent's runtime PM is enabled,
|
||||
calling pm_runtime_set_active() for the device will affect the parent, unless
|
||||
the parent's 'power.ignore_children' flag is set. Namely, in that case the
|
||||
parent won't be able to suspend at run time, using the PM core's helper
|
||||
functions, as long as the child's status is 'active', even if the child's
|
||||
run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
|
||||
runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
|
||||
the child yet or pm_runtime_disable() has been called for it). For this reason,
|
||||
once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
|
||||
should be called for it too as soon as reasonably possible or its run-time PM
|
||||
should be called for it too as soon as reasonably possible or its runtime PM
|
||||
status should be changed back to 'suspended' with the help of
|
||||
pm_runtime_set_suspended().
|
||||
|
||||
If the default initial run-time PM status of the device (i.e. 'suspended')
|
||||
If the default initial runtime PM status of the device (i.e. 'suspended')
|
||||
reflects the actual state of the device, its bus type's or its driver's
|
||||
->probe() callback will likely need to wake it up using one of the PM core's
|
||||
helper functions described in Section 4. In that case, pm_runtime_resume()
|
||||
should be used. Of course, for this purpose the device's run-time PM has to be
|
||||
should be used. Of course, for this purpose the device's runtime PM has to be
|
||||
enabled earlier by calling pm_runtime_enable().
|
||||
|
||||
If the device bus type's or driver's ->probe() callback runs
|
||||
|
@ -529,33 +544,33 @@ The user space can effectively disallow the driver of the device to power manage
|
|||
it at run time by changing the value of its /sys/devices/.../power/control
|
||||
attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
|
||||
this mechanism may also be used by the driver to effectively turn off the
|
||||
run-time power management of the device until the user space turns it on.
|
||||
Namely, during the initialization the driver can make sure that the run-time PM
|
||||
runtime power management of the device until the user space turns it on.
|
||||
Namely, during the initialization the driver can make sure that the runtime PM
|
||||
status of the device is 'active' and call pm_runtime_forbid(). It should be
|
||||
noted, however, that if the user space has already intentionally changed the
|
||||
value of /sys/devices/.../power/control to "auto" to allow the driver to power
|
||||
manage the device at run time, the driver may confuse it by using
|
||||
pm_runtime_forbid() this way.
|
||||
|
||||
6. Run-time PM and System Sleep
|
||||
6. Runtime PM and System Sleep
|
||||
|
||||
Run-time PM and system sleep (i.e., system suspend and hibernation, also known
|
||||
Runtime PM and system sleep (i.e., system suspend and hibernation, also known
|
||||
as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
|
||||
ways. If a device is active when a system sleep starts, everything is
|
||||
straightforward. But what should happen if the device is already suspended?
|
||||
|
||||
The device may have different wake-up settings for run-time PM and system sleep.
|
||||
For example, remote wake-up may be enabled for run-time suspend but disallowed
|
||||
The device may have different wake-up settings for runtime PM and system sleep.
|
||||
For example, remote wake-up may be enabled for runtime suspend but disallowed
|
||||
for system sleep (device_may_wakeup(dev) returns 'false'). When this happens,
|
||||
the subsystem-level system suspend callback is responsible for changing the
|
||||
device's wake-up setting (it may leave that to the device driver's system
|
||||
suspend routine). It may be necessary to resume the device and suspend it again
|
||||
in order to do so. The same is true if the driver uses different power levels
|
||||
or other settings for run-time suspend and system sleep.
|
||||
or other settings for runtime suspend and system sleep.
|
||||
|
||||
During system resume, devices generally should be brought back to full power,
|
||||
even if they were suspended before the system sleep began. There are several
|
||||
reasons for this, including:
|
||||
During system resume, the simplest approach is to bring all devices back to full
|
||||
power, even if they had been suspended before the system suspend began. There
|
||||
are several reasons for this, including:
|
||||
|
||||
* The device might need to switch power levels, wake-up settings, etc.
|
||||
|
||||
|
@ -570,18 +585,50 @@ reasons for this, including:
|
|||
* The device might need to be reset.
|
||||
|
||||
* Even though the device was suspended, if its usage counter was > 0 then most
|
||||
likely it would need a run-time resume in the near future anyway.
|
||||
likely it would need a runtime resume in the near future anyway.
|
||||
|
||||
* Always going back to full power is simplest.
|
||||
|
||||
If the device was suspended before the sleep began, then its run-time PM status
|
||||
will have to be updated to reflect the actual post-system sleep status. The way
|
||||
to do this is:
|
||||
If the device had been suspended before the system suspend began and it's
|
||||
brought back to full power during resume, then its runtime PM status will have
|
||||
to be updated to reflect the actual post-system sleep status. The way to do
|
||||
this is:
|
||||
|
||||
pm_runtime_disable(dev);
|
||||
pm_runtime_set_active(dev);
|
||||
pm_runtime_enable(dev);
|
||||
|
||||
The PM core always increments the runtime usage counter before calling the
|
||||
->suspend() callback and decrements it after calling the ->resume() callback.
|
||||
Hence disabling runtime PM temporarily like this will not cause any runtime
|
||||
suspend attempts to be permanently lost. If the usage count goes to zero
|
||||
following the return of the ->resume() callback, the ->runtime_idle() callback
|
||||
will be invoked as usual.
|
||||
|
||||
On some systems, however, system sleep is not entered through a global firmware
|
||||
or hardware operation. Instead, all hardware components are put into low-power
|
||||
states directly by the kernel in a coordinated way. Then, the system sleep
|
||||
state effectively follows from the states the hardware components end up in
|
||||
and the system is woken up from that state by a hardware interrupt or a similar
|
||||
mechanism entirely under the kernel's control. As a result, the kernel never
|
||||
gives control away and the states of all devices during resume are precisely
|
||||
known to it. If that is the case and none of the situations listed above takes
|
||||
place (in particular, if the system is not waking up from hibernation), it may
|
||||
be more efficient to leave the devices that had been suspended before the system
|
||||
suspend began in the suspended state.
|
||||
|
||||
The PM core does its best to reduce the probability of race conditions between
|
||||
the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
|
||||
out the following operations:
|
||||
|
||||
* During system suspend it calls pm_runtime_get_noresume() and
|
||||
pm_runtime_barrier() for every device right before executing the
|
||||
subsystem-level .suspend() callback for it. In addition to that it calls
|
||||
pm_runtime_disable() for every device right after executing the
|
||||
subsystem-level .suspend() callback for it.
|
||||
|
||||
* During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
|
||||
for every device right before and right after executing the subsystem-level
|
||||
.resume() callback for it, respectively.
|
||||
|
||||
7. Generic subsystem callbacks
|
||||
|
||||
Subsystems may wish to conserve code space by using the set of generic power
|
||||
|
@ -606,40 +653,68 @@ driver/base/power/generic_ops.c:
|
|||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_suspend_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_resume(struct device *dev);
|
||||
- invoke the ->resume() callback provided by the driver of this device and,
|
||||
if successful, change the device's runtime PM status to 'active'
|
||||
|
||||
int pm_generic_resume_noirq(struct device *dev);
|
||||
- invoke the ->resume_noirq() callback provided by the driver of this device
|
||||
|
||||
int pm_generic_freeze(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->freeze()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_freeze_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_thaw(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->thaw()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_thaw_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_poweroff(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->poweroff()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_poweroff_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_restore(struct device *dev);
|
||||
- invoke the ->restore() callback provided by the driver of this device and,
|
||||
if successful, change the device's runtime PM status to 'active'
|
||||
|
||||
int pm_generic_restore_noirq(struct device *dev);
|
||||
- invoke the ->restore_noirq() callback provided by the device's driver
|
||||
|
||||
These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
|
||||
->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(),
|
||||
or ->restore() callback pointers in the subsystem-level dev_pm_ops structures.
|
||||
->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
|
||||
->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
|
||||
->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() callback
|
||||
pointers in the subsystem-level dev_pm_ops structures.
|
||||
|
||||
If a subsystem wishes to use all of them at the same time, it can simply assign
|
||||
the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
|
||||
dev_pm_ops structure pointer.
|
||||
|
||||
Device drivers that wish to use the same function as a system suspend, freeze,
|
||||
poweroff and run-time suspend callback, and similarly for system resume, thaw,
|
||||
restore, and run-time resume, can achieve this with the help of the
|
||||
poweroff and runtime suspend callback, and similarly for system resume, thaw,
|
||||
restore, and runtime resume, can achieve this with the help of the
|
||||
UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
|
||||
last argument to NULL).
|
||||
|
||||
|
@ -649,7 +724,7 @@ Some "devices" are only logical sub-devices of their parent and cannot be
|
|||
power-managed on their own. (The prototype example is a USB interface. Entire
|
||||
USB devices can go into low-power mode or send wake-up requests, but neither is
|
||||
possible for individual interfaces.) The drivers for these devices have no
|
||||
need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend()
|
||||
need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
|
||||
and ->runtime_resume() would always return 0 without doing anything else and
|
||||
->runtime_idle() would always call pm_runtime_suspend().
|
||||
|
||||
|
@ -657,7 +732,7 @@ Subsystems can tell the PM core about these devices by calling
|
|||
pm_runtime_no_callbacks(). This should be done after the device structure is
|
||||
initialized and before it is registered (although after device registration is
|
||||
also okay). The routine will set the device's power.no_callbacks flag and
|
||||
prevent the non-debugging run-time PM sysfs attributes from being created.
|
||||
prevent the non-debugging runtime PM sysfs attributes from being created.
|
||||
|
||||
When power.no_callbacks is set, the PM core will not invoke the
|
||||
->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
|
||||
|
@ -665,7 +740,7 @@ Instead it will assume that suspends and resumes always succeed and that idle
|
|||
devices should be suspended.
|
||||
|
||||
As a consequence, the PM core will never directly inform the device's subsystem
|
||||
or driver about run-time power changes. Instead, the driver for the device's
|
||||
or driver about runtime power changes. Instead, the driver for the device's
|
||||
parent must take responsibility for telling the device's driver when the
|
||||
parent's power state changes.
|
||||
|
||||
|
@ -676,13 +751,13 @@ A device should be put in a low-power state only when there's some reason to
|
|||
think it will remain in that state for a substantial time. A common heuristic
|
||||
says that a device which hasn't been used for a while is liable to remain
|
||||
unused; following this advice, drivers should not allow devices to be suspended
|
||||
at run-time until they have been inactive for some minimum period. Even when
|
||||
at runtime until they have been inactive for some minimum period. Even when
|
||||
the heuristic ends up being non-optimal, it will still prevent devices from
|
||||
"bouncing" too rapidly between low-power and full-power states.
|
||||
|
||||
The term "autosuspend" is an historical remnant. It doesn't mean that the
|
||||
device is automatically suspended (the subsystem or driver still has to call
|
||||
the appropriate PM routines); rather it means that run-time suspends will
|
||||
the appropriate PM routines); rather it means that runtime suspends will
|
||||
automatically be delayed until the desired period of inactivity has elapsed.
|
||||
|
||||
Inactivity is determined based on the power.last_busy field. Drivers should
|
||||
|
|
|
@ -196,15 +196,20 @@ Support for Augmented rbtrees
|
|||
Augmented rbtree is an rbtree with "some" additional data stored in each node.
|
||||
This data can be used to augment some new functionality to rbtree.
|
||||
Augmented rbtree is an optional feature built on top of basic rbtree
|
||||
infrastructure. rbtree user who wants this feature will have an augment
|
||||
callback function in rb_root initialized.
|
||||
infrastructure. An rbtree user who wants this feature will have to call the
|
||||
augmentation functions with the user provided augmentation callback
|
||||
when inserting and erasing nodes.
|
||||
|
||||
This callback function will be called from rbtree core routines whenever
|
||||
a node has a change in one or both of its children. It is the responsibility
|
||||
of the callback function to recalculate the additional data that is in the
|
||||
rb node using new children information. Note that if this new additional
|
||||
data affects the parent node's additional data, then callback function has
|
||||
to handle it and do the recursive updates.
|
||||
On insertion, the user must call rb_augment_insert() once the new node is in
|
||||
place. This will cause the augmentation function callback to be called for
|
||||
each node between the new node and the root which has been affected by the
|
||||
insertion.
|
||||
|
||||
When erasing a node, the user must call rb_augment_erase_begin() first to
|
||||
retrieve the deepest node on the rebalance path. Then, after erasing the
|
||||
original node, the user must call rb_augment_erase_end() with the deepest
|
||||
node found earlier. This will cause the augmentation function to be called
|
||||
for each affected node between the deepest node and the root.
|
||||
|
||||
|
||||
Interval tree is an example of augmented rb tree. Reference -
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
Channel attached Tape device driver
|
||||
|
||||
-----------------------------WARNING-----------------------------------------
|
||||
This driver is considered to be EXPERIMENTAL. Do NOT use it in
|
||||
production environments. Feel free to test it and report problems back to us.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
The LINUX for zSeries tape device driver manages channel attached tape drives
|
||||
which are compatible to IBM 3480 or IBM 3490 magnetic tape subsystems. This
|
||||
includes various models of these devices (for example the 3490E).
|
||||
|
||||
|
||||
Tape driver features
|
||||
|
||||
The device driver supports a maximum of 128 tape devices.
|
||||
No official LINUX device major number is assigned to the zSeries tape device
|
||||
driver. It allocates major numbers dynamically and reports them on system
|
||||
startup.
|
||||
Typically it will get major number 254 for both the character device front-end
|
||||
and the block device front-end.
|
||||
|
||||
The tape device driver needs no kernel parameters. All supported devices
|
||||
present are detected on driver initialization at system startup or module load.
|
||||
The devices detected are ordered by their subchannel numbers. The device with
|
||||
the lowest subchannel number becomes device 0, the next one will be device 1
|
||||
and so on.
|
||||
|
||||
|
||||
Tape character device front-end
|
||||
|
||||
The usual way to read or write to the tape device is through the character
|
||||
device front-end. The zSeries tape device driver provides two character devices
|
||||
for each physical device -- the first of these will rewind automatically when
|
||||
it is closed, the second will not rewind automatically.
|
||||
|
||||
The character device nodes are named /dev/rtibm0 (rewinding) and /dev/ntibm0
|
||||
(non-rewinding) for the first device, /dev/rtibm1 and /dev/ntibm1 for the
|
||||
second, and so on.
|
||||
|
||||
The character device front-end can be used as any other LINUX tape device. You
|
||||
can write to it and read from it using LINUX facilities such as GNU tar. The
|
||||
tool mt can be used to perform control operations, such as rewinding the tape
|
||||
or skipping a file.
|
||||
|
||||
Most LINUX tape software should work with either tape character device.
|
||||
|
||||
|
||||
Tape block device front-end
|
||||
|
||||
The tape device may also be accessed as a block device in read-only mode.
|
||||
This could be used for software installation in the same way as it is used with
|
||||
other operation systems on the zSeries platform (and most LINUX
|
||||
distributions are shipped on compact disk using ISO9660 filesystems).
|
||||
|
||||
One block device node is provided for each physical device. These are named
|
||||
/dev/btibm0 for the first device, /dev/btibm1 for the second and so on.
|
||||
You should only use the ISO9660 filesystem on LINUX for zSeries tapes because
|
||||
the physical tape devices cannot perform fast seeks and the ISO9660 system is
|
||||
optimized for this situation.
|
||||
|
||||
|
||||
Tape block device example
|
||||
|
||||
In this example a tape with an ISO9660 filesystem is created using the first
|
||||
tape device. ISO9660 filesystem support must be built into your system kernel
|
||||
for this.
|
||||
The mt command is used to issue tape commands and the mkisofs command to
|
||||
create an ISO9660 filesystem:
|
||||
|
||||
- create a LINUX directory (somedir) with the contents of the filesystem
|
||||
mkdir somedir
|
||||
cp contents somedir
|
||||
|
||||
- insert a tape
|
||||
|
||||
- ensure the tape is at the beginning
|
||||
mt -f /dev/ntibm0 rewind
|
||||
|
||||
- set the blocksize of the character driver. The blocksize 2048 bytes
|
||||
is commonly used on ISO9660 CD-Roms
|
||||
mt -f /dev/ntibm0 setblk 2048
|
||||
|
||||
- write the filesystem to the character device driver
|
||||
mkisofs -o /dev/ntibm0 somedir
|
||||
|
||||
- rewind the tape again
|
||||
mt -f /dev/ntibm0 rewind
|
||||
|
||||
- Now you can mount your new filesystem as a block device:
|
||||
mount -t iso9660 -o ro,block=2048 /dev/btibm0 /mnt
|
||||
|
||||
TODO List
|
||||
|
||||
- Driver has to be stabilized still
|
||||
|
||||
BUGS
|
||||
|
||||
This driver is considered BETA, which means some weaknesses may still
|
||||
be in it.
|
||||
If an error occurs which cannot be handled by the code you will get a
|
||||
sense-data dump.In that case please do the following:
|
||||
|
||||
1. set the tape driver debug level to maximum:
|
||||
echo 6 >/proc/s390dbf/tape/level
|
||||
|
||||
2. re-perform the actions which produced the bug. (Hopefully the bug will
|
||||
reappear.)
|
||||
|
||||
3. get a snapshot from the debug-feature:
|
||||
cat /proc/s390dbf/tape/hex_ascii >somefile
|
||||
|
||||
4. Now put the snapshot together with a detailed description of the situation
|
||||
that led to the bug:
|
||||
- Which tool did you use?
|
||||
- Which hardware do you have?
|
||||
- Was your tape unit online?
|
||||
- Is it a shared tape unit?
|
||||
|
||||
5. Send an email with your bug report to:
|
||||
mailto:Linux390@de.ibm.com
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ Your cpu_idle routines need to obey the following rules:
|
|||
barrier issued (followed by a test of need_resched with
|
||||
interrupts disabled, as explained in 3).
|
||||
|
||||
arch/i386/kernel/process.c has examples of both polling and
|
||||
arch/x86/kernel/process.c has examples of both polling and
|
||||
sleeping idle functions.
|
||||
|
||||
|
||||
|
|
|
@ -553,7 +553,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree:
|
|||
make config
|
||||
make zImage
|
||||
|
||||
Then install "arch/i386/boot/zImage" as your standard kernel, run lilo if
|
||||
Then install "arch/x86/boot/zImage" as your standard kernel, run lilo if
|
||||
appropriate, and reboot.
|
||||
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ c) Set address on ISA cards then:
|
|||
edit /usr/src/linux/drivers/char/ip2.c
|
||||
(Optional - may be specified on kernel command line now)
|
||||
d) Run "make zImage" or whatever target you prefer.
|
||||
e) mv /usr/src/linux/arch/i386/boot/zImage to /boot.
|
||||
e) mv /usr/src/linux/arch/x86/boot/zImage to /boot.
|
||||
f) Add new config for this kernel into /etc/lilo.conf, run "lilo"
|
||||
or copy to a floppy disk and boot from that floppy disk.
|
||||
g) Reboot using this kernel
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
This file explains the codec-specific mixer controls.
|
||||
|
||||
Realtek codecs
|
||||
--------------
|
||||
|
||||
* Channel Mode
|
||||
This is an enum control to change the surround-channel setup,
|
||||
appears only when the surround channels are available.
|
||||
It gives the number of channels to be used, "2ch", "4ch", "6ch",
|
||||
and "8ch". According to the configuration, this also controls the
|
||||
jack-retasking of multi-I/O jacks.
|
||||
|
||||
* Auto-Mute Mode
|
||||
This is an enum control to change the auto-mute behavior of the
|
||||
headphone and line-out jacks. If built-in speakers and headphone
|
||||
and/or line-out jacks are available on a machine, this controls
|
||||
appears.
|
||||
When there are only either headphones or line-out jacks, it gives
|
||||
"Disabled" and "Enabled" state. When enabled, the speaker is muted
|
||||
automatically when a jack is plugged.
|
||||
|
||||
When both headphone and line-out jacks are present, it gives
|
||||
"Disabled", "Speaker Only" and "Line-Out+Speaker". When
|
||||
speaker-only is chosen, plugging into a headphone or a line-out jack
|
||||
mutes the speakers, but not line-outs. When line-out+speaker is
|
||||
selected, plugging to a headphone jack mutes both speakers and
|
||||
line-outs.
|
||||
|
||||
|
||||
IDT/Sigmatel codecs
|
||||
-------------------
|
||||
|
||||
* Analog Loopback
|
||||
This control enables/disables the analog-loopback circuit. This
|
||||
appears only when "loopback" is set to true in a codec hint
|
||||
(see HD-Audio.txt). Note that on some codecs the analog-loopback
|
||||
and the normal PCM playback are exclusive, i.e. when this is on, you
|
||||
won't hear any PCM stream.
|
||||
|
||||
* Swap Center/LFE
|
||||
Swaps the center and LFE channel order. Normally, the left
|
||||
corresponds to the center and the right to the LFE. When this is
|
||||
ON, the left to the LFE and the right to the center.
|
||||
|
||||
* Headphone as Line Out
|
||||
When this control is ON, treat the headphone jacks as line-out
|
||||
jacks. That is, the headphone won't auto-mute the other line-outs,
|
||||
and no HP-amp is set to the pins.
|
||||
|
||||
* Mic Jack Mode, Line Jack Mode, etc
|
||||
These enum controls the direction and the bias of the input jack
|
||||
pins. Depending on the jack type, it can set as "Mic In" and "Line
|
||||
In", for determining the input bias, or it can be set to "Line Out"
|
||||
when the pin is a multi-I/O jack for surround channels.
|
||||
|
||||
|
||||
VIA codecs
|
||||
----------
|
||||
|
||||
* Smart 5.1
|
||||
An enum control to re-task the multi-I/O jacks for surround outputs.
|
||||
When it's ON, the corresponding input jacks (usually a line-in and a
|
||||
mic-in) are switched as the surround and the CLFE output jacks.
|
||||
|
||||
* Independent HP
|
||||
When this enum control is enabled, the headphone output is routed
|
||||
from an individual stream (the third PCM such as hw:0,2) instead of
|
||||
the primary stream. In the case the headphone DAC is shared with a
|
||||
side or a CLFE-channel DAC, the DAC is switched to the headphone
|
||||
automatically.
|
||||
|
||||
* Loopback Mixing
|
||||
An enum control to determine whether the analog-loopback route is
|
||||
enabled or not. When it's enabled, the analog-loopback is mixed to
|
||||
the front-channel. Also, the same route is used for the headphone
|
||||
and speaker outputs. As a side-effect, when this mode is set, the
|
||||
individual volume controls will be no longer available for
|
||||
headphones and speakers because there is only one DAC connected to a
|
||||
mixer widget.
|
||||
|
||||
* Dynamic Power-Control
|
||||
This control determines whether the dynamic power-control per jack
|
||||
detection is enabled or not. When enabled, the widgets power state
|
||||
(D0/D3) are changed dynamically depending on the jack plugging
|
||||
state for saving power consumptions. However, if your system
|
||||
doesn't provide a proper jack-detection, this won't work; in such a
|
||||
case, turn this control OFF.
|
||||
|
||||
* Jack Detect
|
||||
This control is provided only for VT1708 codec which gives no proper
|
||||
unsolicited event per jack plug. When this is on, the driver polls
|
||||
the jack detection so that the headphone auto-mute can work, while
|
||||
turning this off would reduce the power consumption.
|
||||
|
||||
|
||||
Conexant codecs
|
||||
---------------
|
||||
|
||||
* Auto-Mute Mode
|
||||
See Reatek codecs.
|
|
@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void)
|
|||
ARRAY_SIZE(ts72xx_spi_devices));
|
||||
}
|
||||
|
||||
The driver can use DMA for the transfers also. In this case ts72xx_spi_info
|
||||
becomes:
|
||||
|
||||
static struct ep93xx_spi_info ts72xx_spi_info = {
|
||||
.num_chipselect = ARRAY_SIZE(ts72xx_spi_devices),
|
||||
.use_dma = true;
|
||||
};
|
||||
|
||||
Note that CONFIG_EP93XX_DMA should be enabled as well.
|
||||
|
||||
Thanks to
|
||||
=========
|
||||
Martin Guy, H. Hartley Sweeten and others who helped me during development of
|
||||
|
|
|
@ -22,15 +22,11 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
|
|||
found in include/linux/spi/pxa2xx_spi.h:
|
||||
|
||||
struct pxa2xx_spi_master {
|
||||
enum pxa_ssp_type ssp_type;
|
||||
u32 clock_enable;
|
||||
u16 num_chipselect;
|
||||
u8 enable_dma;
|
||||
};
|
||||
|
||||
The "pxa2xx_spi_master.ssp_type" field must have a value between 1 and 3 and
|
||||
informs the driver which features a particular SSP supports.
|
||||
|
||||
The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
|
||||
corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
|
||||
the "PXA2xx Developer Manual" section "Clocks and Power Management".
|
||||
|
@ -61,7 +57,6 @@ static struct resource pxa_spi_nssp_resources[] = {
|
|||
};
|
||||
|
||||
static struct pxa2xx_spi_master pxa_nssp_master_info = {
|
||||
.ssp_type = PXA25x_NSSP, /* Type of SSP */
|
||||
.clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
|
||||
.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
|
||||
.enable_dma = 1, /* Enables NSSP DMA */
|
||||
|
|
|
@ -13,18 +13,8 @@ static DEFINE_SPINLOCK(xxx_lock);
|
|||
The above is always safe. It will disable interrupts _locally_, but the
|
||||
spinlock itself will guarantee the global lock, so it will guarantee that
|
||||
there is only one thread-of-control within the region(s) protected by that
|
||||
lock. This works well even under UP. The above sequence under UP
|
||||
essentially is just the same as doing
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
save_flags(flags); cli();
|
||||
... critical section ...
|
||||
restore_flags(flags);
|
||||
|
||||
so the code does _not_ need to worry about UP vs SMP issues: the spinlocks
|
||||
work correctly under both (and spinlocks are actually more efficient on
|
||||
architectures that allow doing the "save_flags + cli" in one operation).
|
||||
lock. This works well even under UP also, so the code does _not_ need to
|
||||
worry about UP vs SMP issues: the spinlocks work correctly under both.
|
||||
|
||||
NOTE! Implications of spin_locks for memory are further described in:
|
||||
|
||||
|
@ -36,27 +26,7 @@ The above is usually pretty simple (you usually need and want only one
|
|||
spinlock for most things - using more than one spinlock can make things a
|
||||
lot more complex and even slower and is usually worth it only for
|
||||
sequences that you _know_ need to be split up: avoid it at all cost if you
|
||||
aren't sure). HOWEVER, it _does_ mean that if you have some code that does
|
||||
|
||||
cli();
|
||||
.. critical section ..
|
||||
sti();
|
||||
|
||||
and another sequence that does
|
||||
|
||||
spin_lock_irqsave(flags);
|
||||
.. critical section ..
|
||||
spin_unlock_irqrestore(flags);
|
||||
|
||||
then they are NOT mutually exclusive, and the critical regions can happen
|
||||
at the same time on two different CPU's. That's fine per se, but the
|
||||
critical regions had better be critical for different things (ie they
|
||||
can't stomp on each other).
|
||||
|
||||
The above is a problem mainly if you end up mixing code - for example the
|
||||
routines in ll_rw_block() tend to use cli/sti to protect the atomicity of
|
||||
their actions, and if a driver uses spinlocks instead then you should
|
||||
think about issues like the above.
|
||||
aren't sure).
|
||||
|
||||
This is really the only really hard part about spinlocks: once you start
|
||||
using spinlocks they tend to expand to areas you might not have noticed
|
||||
|
@ -120,11 +90,10 @@ Lesson 3: spinlocks revisited.
|
|||
|
||||
The single spin-lock primitives above are by no means the only ones. They
|
||||
are the most safe ones, and the ones that work under all circumstances,
|
||||
but partly _because_ they are safe they are also fairly slow. They are
|
||||
much faster than a generic global cli/sti pair, but slower than they'd
|
||||
need to be, because they do have to disable interrupts (which is just a
|
||||
single instruction on a x86, but it's an expensive one - and on other
|
||||
architectures it can be worse).
|
||||
but partly _because_ they are safe they are also fairly slow. They are slower
|
||||
than they'd need to be, because they do have to disable interrupts
|
||||
(which is just a single instruction on a x86, but it's an expensive one -
|
||||
and on other architectures it can be worse).
|
||||
|
||||
If you have a case where you have to protect a data structure across
|
||||
several CPU's and you want to use spinlocks you can potentially use
|
||||
|
|
|
@ -17,23 +17,21 @@ before actually making adjustments.
|
|||
|
||||
Currently, these files might (depending on your configuration)
|
||||
show up in /proc/sys/kernel:
|
||||
- acpi_video_flags
|
||||
|
||||
- acct
|
||||
- acpi_video_flags
|
||||
- auto_msgmni
|
||||
- bootloader_type [ X86 only ]
|
||||
- bootloader_version [ X86 only ]
|
||||
- callhome [ S390 only ]
|
||||
- auto_msgmni
|
||||
- core_pattern
|
||||
- core_pipe_limit
|
||||
- core_uses_pid
|
||||
- ctrl-alt-del
|
||||
- dentry-state
|
||||
- dmesg_restrict
|
||||
- domainname
|
||||
- hostname
|
||||
- hotplug
|
||||
- java-appletviewer [ binfmt_java, obsolete ]
|
||||
- java-interpreter [ binfmt_java, obsolete ]
|
||||
- kptr_restrict
|
||||
- kstack_depth_to_print [ X86 only ]
|
||||
- l2cr [ PPC only ]
|
||||
|
@ -48,10 +46,14 @@ show up in /proc/sys/kernel:
|
|||
- overflowgid
|
||||
- overflowuid
|
||||
- panic
|
||||
- panic_on_oops
|
||||
- panic_on_unrecovered_nmi
|
||||
- pid_max
|
||||
- powersave-nap [ PPC only ]
|
||||
- panic_on_unrecovered_nmi
|
||||
- printk
|
||||
- printk_delay
|
||||
- printk_ratelimit
|
||||
- printk_ratelimit_burst
|
||||
- randomize_va_space
|
||||
- real-root-dev ==> Documentation/initrd.txt
|
||||
- reboot-cmd [ SPARC only ]
|
||||
|
@ -62,6 +64,7 @@ show up in /proc/sys/kernel:
|
|||
- shmall
|
||||
- shmmax [ sysv ipc ]
|
||||
- shmmni
|
||||
- softlockup_thresh
|
||||
- stop-a [ SPARC only ]
|
||||
- sysrq ==> Documentation/sysrq.txt
|
||||
- tainted
|
||||
|
@ -71,15 +74,6 @@ show up in /proc/sys/kernel:
|
|||
|
||||
==============================================================
|
||||
|
||||
acpi_video_flags:
|
||||
|
||||
flags
|
||||
|
||||
See Doc*/kernel/power/video.txt, it allows mode of video boot to be
|
||||
set during run time.
|
||||
|
||||
==============================================================
|
||||
|
||||
acct:
|
||||
|
||||
highwater lowwater frequency
|
||||
|
@ -95,6 +89,25 @@ That is, suspend accounting if there left <= 2% free; resume it
|
|||
if we got >=4%; consider information about amount of free space
|
||||
valid for 30 seconds.
|
||||
|
||||
==============================================================
|
||||
|
||||
acpi_video_flags:
|
||||
|
||||
flags
|
||||
|
||||
See Doc*/kernel/power/video.txt, it allows mode of video boot to be
|
||||
set during run time.
|
||||
|
||||
==============================================================
|
||||
|
||||
auto_msgmni:
|
||||
|
||||
Enables/Disables automatic recomputing of msgmni upon memory add/remove
|
||||
or upon ipc namespace creation/removal (see the msgmni description
|
||||
above). Echoing "1" into this file enables msgmni automatic recomputing.
|
||||
Echoing "0" turns it off. auto_msgmni default value is 1.
|
||||
|
||||
|
||||
==============================================================
|
||||
|
||||
bootloader_type:
|
||||
|
@ -172,22 +185,24 @@ core_pattern is used to specify a core dumpfile pattern name.
|
|||
|
||||
core_pipe_limit:
|
||||
|
||||
This sysctl is only applicable when core_pattern is configured to pipe core
|
||||
files to a user space helper (when the first character of core_pattern is a '|',
|
||||
see above). When collecting cores via a pipe to an application, it is
|
||||
occasionally useful for the collecting application to gather data about the
|
||||
crashing process from its /proc/pid directory. In order to do this safely, the
|
||||
kernel must wait for the collecting process to exit, so as not to remove the
|
||||
crashing processes proc files prematurely. This in turn creates the possibility
|
||||
that a misbehaving userspace collecting process can block the reaping of a
|
||||
crashed process simply by never exiting. This sysctl defends against that. It
|
||||
defines how many concurrent crashing processes may be piped to user space
|
||||
applications in parallel. If this value is exceeded, then those crashing
|
||||
processes above that value are noted via the kernel log and their cores are
|
||||
skipped. 0 is a special value, indicating that unlimited processes may be
|
||||
captured in parallel, but that no waiting will take place (i.e. the collecting
|
||||
process is not guaranteed access to /proc/<crashing pid>/). This value defaults
|
||||
to 0.
|
||||
This sysctl is only applicable when core_pattern is configured to pipe
|
||||
core files to a user space helper (when the first character of
|
||||
core_pattern is a '|', see above). When collecting cores via a pipe
|
||||
to an application, it is occasionally useful for the collecting
|
||||
application to gather data about the crashing process from its
|
||||
/proc/pid directory. In order to do this safely, the kernel must wait
|
||||
for the collecting process to exit, so as not to remove the crashing
|
||||
processes proc files prematurely. This in turn creates the
|
||||
possibility that a misbehaving userspace collecting process can block
|
||||
the reaping of a crashed process simply by never exiting. This sysctl
|
||||
defends against that. It defines how many concurrent crashing
|
||||
processes may be piped to user space applications in parallel. If
|
||||
this value is exceeded, then those crashing processes above that value
|
||||
are noted via the kernel log and their cores are skipped. 0 is a
|
||||
special value, indicating that unlimited processes may be captured in
|
||||
parallel, but that no waiting will take place (i.e. the collecting
|
||||
process is not guaranteed access to /proc/<crashing pid>/). This
|
||||
value defaults to 0.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -218,14 +233,14 @@ to decide what to do with it.
|
|||
|
||||
dmesg_restrict:
|
||||
|
||||
This toggle indicates whether unprivileged users are prevented from using
|
||||
dmesg(8) to view messages from the kernel's log buffer. When
|
||||
dmesg_restrict is set to (0) there are no restrictions. When
|
||||
This toggle indicates whether unprivileged users are prevented
|
||||
from using dmesg(8) to view messages from the kernel's log buffer.
|
||||
When dmesg_restrict is set to (0) there are no restrictions. When
|
||||
dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
|
||||
dmesg(8).
|
||||
|
||||
The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default
|
||||
value of dmesg_restrict.
|
||||
The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
|
||||
default value of dmesg_restrict.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -256,13 +271,6 @@ Default value is "/sbin/hotplug".
|
|||
|
||||
==============================================================
|
||||
|
||||
l2cr: (PPC only)
|
||||
|
||||
This flag controls the L2 cache of G3 processor boards. If
|
||||
0, the cache is disabled. Enabled if nonzero.
|
||||
|
||||
==============================================================
|
||||
|
||||
kptr_restrict:
|
||||
|
||||
This toggle indicates whether restrictions are placed on
|
||||
|
@ -283,6 +291,13 @@ kernel stack.
|
|||
|
||||
==============================================================
|
||||
|
||||
l2cr: (PPC only)
|
||||
|
||||
This flag controls the L2 cache of G3 processor boards. If
|
||||
0, the cache is disabled. Enabled if nonzero.
|
||||
|
||||
==============================================================
|
||||
|
||||
modules_disabled:
|
||||
|
||||
A toggle value indicating if modules are allowed to be loaded
|
||||
|
@ -293,6 +308,21 @@ to false.
|
|||
|
||||
==============================================================
|
||||
|
||||
nmi_watchdog:
|
||||
|
||||
Enables/Disables the NMI watchdog on x86 systems. When the value is
|
||||
non-zero the NMI watchdog is enabled and will continuously test all
|
||||
online cpus to determine whether or not they are still functioning
|
||||
properly. Currently, passing "nmi_watchdog=" parameter at boot time is
|
||||
required for this function to work.
|
||||
|
||||
If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel
|
||||
parameter), the NMI watchdog shares registers with oprofile. By
|
||||
disabling the NMI watchdog, oprofile may have more registers to
|
||||
utilize.
|
||||
|
||||
==============================================================
|
||||
|
||||
osrelease, ostype & version:
|
||||
|
||||
# cat osrelease
|
||||
|
@ -312,10 +342,10 @@ The only way to tune these values is to rebuild the kernel :-)
|
|||
|
||||
overflowgid & overflowuid:
|
||||
|
||||
if your architecture did not always support 32-bit UIDs (i.e. arm, i386,
|
||||
m68k, sh, and sparc32), a fixed UID and GID will be returned to
|
||||
applications that use the old 16-bit UID/GID system calls, if the actual
|
||||
UID or GID would exceed 65535.
|
||||
if your architecture did not always support 32-bit UIDs (i.e. arm,
|
||||
i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
|
||||
applications that use the old 16-bit UID/GID system calls, if the
|
||||
actual UID or GID would exceed 65535.
|
||||
|
||||
These sysctls allow you to change the value of the fixed UID and GID.
|
||||
The default is 65534.
|
||||
|
@ -324,9 +354,22 @@ The default is 65534.
|
|||
|
||||
panic:
|
||||
|
||||
The value in this file represents the number of seconds the
|
||||
kernel waits before rebooting on a panic. When you use the
|
||||
software watchdog, the recommended setting is 60.
|
||||
The value in this file represents the number of seconds the kernel
|
||||
waits before rebooting on a panic. When you use the software watchdog,
|
||||
the recommended setting is 60.
|
||||
|
||||
==============================================================
|
||||
|
||||
panic_on_unrecovered_nmi:
|
||||
|
||||
The default Linux behaviour on an NMI of either memory or unknown is
|
||||
to continue operation. For many environments such as scientific
|
||||
computing it is preferable that the box is taken out and the error
|
||||
dealt with than an uncorrected parity/ECC error get propagated.
|
||||
|
||||
A small number of systems do generate NMI's for bizarre random reasons
|
||||
such as power management so the default is off. That sysctl works like
|
||||
the existing panic controls already in that directory.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -376,6 +419,14 @@ the different loglevels.
|
|||
|
||||
==============================================================
|
||||
|
||||
printk_delay:
|
||||
|
||||
Delay each printk message in printk_delay milliseconds
|
||||
|
||||
Value from 0 - 10000 is allowed.
|
||||
|
||||
==============================================================
|
||||
|
||||
printk_ratelimit:
|
||||
|
||||
Some warning messages are rate limited. printk_ratelimit specifies
|
||||
|
@ -395,15 +446,7 @@ send before ratelimiting kicks in.
|
|||
|
||||
==============================================================
|
||||
|
||||
printk_delay:
|
||||
|
||||
Delay each printk message in printk_delay milliseconds
|
||||
|
||||
Value from 0 - 10000 is allowed.
|
||||
|
||||
==============================================================
|
||||
|
||||
randomize-va-space:
|
||||
randomize_va_space:
|
||||
|
||||
This option can be used to select the type of process address
|
||||
space randomization that is used in the system, for architectures
|
||||
|
@ -466,11 +509,11 @@ are doing anyway :)
|
|||
|
||||
==============================================================
|
||||
|
||||
shmmax:
|
||||
shmmax:
|
||||
|
||||
This value can be used to query and set the run time limit
|
||||
on the maximum shared memory segment size that can be created.
|
||||
Shared memory segments up to 1Gb are now supported in the
|
||||
Shared memory segments up to 1Gb are now supported in the
|
||||
kernel. This value defaults to SHMMAX.
|
||||
|
||||
==============================================================
|
||||
|
@ -484,7 +527,7 @@ tunable to zero will disable the softlockup detection altogether.
|
|||
|
||||
==============================================================
|
||||
|
||||
tainted:
|
||||
tainted:
|
||||
|
||||
Non-zero if the kernel has been tainted. Numeric values, which
|
||||
can be ORed together:
|
||||
|
@ -509,49 +552,11 @@ can be ORed together:
|
|||
|
||||
==============================================================
|
||||
|
||||
auto_msgmni:
|
||||
|
||||
Enables/Disables automatic recomputing of msgmni upon memory add/remove or
|
||||
upon ipc namespace creation/removal (see the msgmni description above).
|
||||
Echoing "1" into this file enables msgmni automatic recomputing.
|
||||
Echoing "0" turns it off.
|
||||
auto_msgmni default value is 1.
|
||||
|
||||
==============================================================
|
||||
|
||||
nmi_watchdog:
|
||||
|
||||
Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
|
||||
the NMI watchdog is enabled and will continuously test all online cpus to
|
||||
determine whether or not they are still functioning properly. Currently,
|
||||
passing "nmi_watchdog=" parameter at boot time is required for this function
|
||||
to work.
|
||||
|
||||
If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
|
||||
NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
|
||||
oprofile may have more registers to utilize.
|
||||
|
||||
==============================================================
|
||||
|
||||
unknown_nmi_panic:
|
||||
|
||||
The value in this file affects behavior of handling NMI. When the value is
|
||||
non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
|
||||
debugging information is displayed on console.
|
||||
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for example.
|
||||
If a system hangs up, try pressing the NMI switch.
|
||||
|
||||
==============================================================
|
||||
|
||||
panic_on_unrecovered_nmi:
|
||||
|
||||
The default Linux behaviour on an NMI of either memory or unknown is to continue
|
||||
operation. For many environments such as scientific computing it is preferable
|
||||
that the box is taken out and the error dealt with than an uncorrected
|
||||
parity/ECC error get propogated.
|
||||
|
||||
A small number of systems do generate NMI's for bizarre random reasons such as
|
||||
power management so the default is off. That sysctl works like the existing
|
||||
panic controls already in that directory.
|
||||
The value in this file affects behavior of handling NMI. When the
|
||||
value is non-zero, unknown NMI is trapped and then panic occurs. At
|
||||
that time, kernel debugging information is displayed on console.
|
||||
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for
|
||||
example. If a system hangs up, try pressing the NMI switch.
|
||||
|
|
|
@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
|
|||
|
||||
Synopsis of kprobe_events
|
||||
-------------------------
|
||||
p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
|
||||
p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
|
||||
-:[GRP/]EVENT : Clear a probe
|
||||
|
||||
GRP : Group name. If omitted, use "kprobes" for it.
|
||||
EVENT : Event name. If omitted, the event name is generated
|
||||
based on SYMBOL+offs or MEMADDR.
|
||||
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
|
||||
based on SYM+offs or MEMADDR.
|
||||
MOD : Module name which has given SYM.
|
||||
SYM[+offs] : Symbol+offset where the probe is inserted.
|
||||
MEMADDR : Address where the probe is inserted.
|
||||
|
||||
FETCHARGS : Arguments. Each probe can have up to 128 args.
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* parse_vdso.c: Linux reference vDSO parser
|
||||
* Written by Andrew Lutomirski, 2011.
|
||||
*
|
||||
* This code is meant to be linked in to various programs that run on Linux.
|
||||
* As such, it is available with as few restrictions as possible. This file
|
||||
* is licensed under the Creative Commons Zero License, version 1.0,
|
||||
* available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
|
||||
*
|
||||
* The vDSO is a regular ELF DSO that the kernel maps into user space when
|
||||
* it starts a program. It works equally well in statically and dynamically
|
||||
* linked binaries.
|
||||
*
|
||||
* This code is tested on x86_64. In principle it should work on any 64-bit
|
||||
* architecture that has a vDSO.
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <elf.h>
|
||||
|
||||
/*
|
||||
* To use this vDSO parser, first call one of the vdso_init_* functions.
|
||||
* If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
|
||||
* to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
|
||||
* Then call vdso_sym for each symbol you want. For example, to look up
|
||||
* gettimeofday on x86_64, use:
|
||||
*
|
||||
* <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
|
||||
* or
|
||||
* <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
|
||||
*
|
||||
* vdso_sym will return 0 if the symbol doesn't exist or if the init function
|
||||
* failed or was not called. vdso_sym is a little slow, so its return value
|
||||
* should be cached.
|
||||
*
|
||||
* vdso_sym is threadsafe; the init functions are not.
|
||||
*
|
||||
* These are the prototypes:
|
||||
*/
|
||||
extern void vdso_init_from_auxv(void *auxv);
|
||||
extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
|
||||
extern void *vdso_sym(const char *version, const char *name);
|
||||
|
||||
|
||||
/* And here's the code. */
|
||||
|
||||
#ifndef __x86_64__
|
||||
# error Not yet ported to non-x86_64 architectures
|
||||
#endif
|
||||
|
||||
static struct vdso_info
|
||||
{
|
||||
bool valid;
|
||||
|
||||
/* Load information */
|
||||
uintptr_t load_addr;
|
||||
uintptr_t load_offset; /* load_addr - recorded vaddr */
|
||||
|
||||
/* Symbol table */
|
||||
Elf64_Sym *symtab;
|
||||
const char *symstrings;
|
||||
Elf64_Word *bucket, *chain;
|
||||
Elf64_Word nbucket, nchain;
|
||||
|
||||
/* Version table */
|
||||
Elf64_Versym *versym;
|
||||
Elf64_Verdef *verdef;
|
||||
} vdso_info;
|
||||
|
||||
/* Straight from the ELF specification. */
|
||||
static unsigned long elf_hash(const unsigned char *name)
|
||||
{
|
||||
unsigned long h = 0, g;
|
||||
while (*name)
|
||||
{
|
||||
h = (h << 4) + *name++;
|
||||
if (g = h & 0xf0000000)
|
||||
h ^= g >> 24;
|
||||
h &= ~g;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
void vdso_init_from_sysinfo_ehdr(uintptr_t base)
|
||||
{
|
||||
size_t i;
|
||||
bool found_vaddr = false;
|
||||
|
||||
vdso_info.valid = false;
|
||||
|
||||
vdso_info.load_addr = base;
|
||||
|
||||
Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
|
||||
Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
|
||||
Elf64_Dyn *dyn = 0;
|
||||
|
||||
/*
|
||||
* We need two things from the segment table: the load offset
|
||||
* and the dynamic table.
|
||||
*/
|
||||
for (i = 0; i < hdr->e_phnum; i++)
|
||||
{
|
||||
if (pt[i].p_type == PT_LOAD && !found_vaddr) {
|
||||
found_vaddr = true;
|
||||
vdso_info.load_offset = base
|
||||
+ (uintptr_t)pt[i].p_offset
|
||||
- (uintptr_t)pt[i].p_vaddr;
|
||||
} else if (pt[i].p_type == PT_DYNAMIC) {
|
||||
dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_vaddr || !dyn)
|
||||
return; /* Failed */
|
||||
|
||||
/*
|
||||
* Fish out the useful bits of the dynamic table.
|
||||
*/
|
||||
Elf64_Word *hash = 0;
|
||||
vdso_info.symstrings = 0;
|
||||
vdso_info.symtab = 0;
|
||||
vdso_info.versym = 0;
|
||||
vdso_info.verdef = 0;
|
||||
for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
|
||||
switch (dyn[i].d_tag) {
|
||||
case DT_STRTAB:
|
||||
vdso_info.symstrings = (const char *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_SYMTAB:
|
||||
vdso_info.symtab = (Elf64_Sym *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_HASH:
|
||||
hash = (Elf64_Word *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_VERSYM:
|
||||
vdso_info.versym = (Elf64_Versym *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_VERDEF:
|
||||
vdso_info.verdef = (Elf64_Verdef *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
|
||||
return; /* Failed */
|
||||
|
||||
if (!vdso_info.verdef)
|
||||
vdso_info.versym = 0;
|
||||
|
||||
/* Parse the hash table header. */
|
||||
vdso_info.nbucket = hash[0];
|
||||
vdso_info.nchain = hash[1];
|
||||
vdso_info.bucket = &hash[2];
|
||||
vdso_info.chain = &hash[vdso_info.nbucket + 2];
|
||||
|
||||
/* That's all we need. */
|
||||
vdso_info.valid = true;
|
||||
}
|
||||
|
||||
static bool vdso_match_version(Elf64_Versym ver,
|
||||
const char *name, Elf64_Word hash)
|
||||
{
|
||||
/*
|
||||
* This is a helper function to check if the version indexed by
|
||||
* ver matches name (which hashes to hash).
|
||||
*
|
||||
* The version definition table is a mess, and I don't know how
|
||||
* to do this in better than linear time without allocating memory
|
||||
* to build an index. I also don't know why the table has
|
||||
* variable size entries in the first place.
|
||||
*
|
||||
* For added fun, I can't find a comprehensible specification of how
|
||||
* to parse all the weird flags in the table.
|
||||
*
|
||||
* So I just parse the whole table every time.
|
||||
*/
|
||||
|
||||
/* First step: find the version definition */
|
||||
ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
|
||||
Elf64_Verdef *def = vdso_info.verdef;
|
||||
while(true) {
|
||||
if ((def->vd_flags & VER_FLG_BASE) == 0
|
||||
&& (def->vd_ndx & 0x7fff) == ver)
|
||||
break;
|
||||
|
||||
if (def->vd_next == 0)
|
||||
return false; /* No definition. */
|
||||
|
||||
def = (Elf64_Verdef *)((char *)def + def->vd_next);
|
||||
}
|
||||
|
||||
/* Now figure out whether it matches. */
|
||||
Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
|
||||
return def->vd_hash == hash
|
||||
&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
|
||||
}
|
||||
|
||||
void *vdso_sym(const char *version, const char *name)
|
||||
{
|
||||
unsigned long ver_hash;
|
||||
if (!vdso_info.valid)
|
||||
return 0;
|
||||
|
||||
ver_hash = elf_hash(version);
|
||||
Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
|
||||
|
||||
for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
|
||||
Elf64_Sym *sym = &vdso_info.symtab[chain];
|
||||
|
||||
/* Check for a defined global or weak function w/ right name. */
|
||||
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
|
||||
continue;
|
||||
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
|
||||
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
|
||||
continue;
|
||||
if (sym->st_shndx == SHN_UNDEF)
|
||||
continue;
|
||||
if (strcmp(name, vdso_info.symstrings + sym->st_name))
|
||||
continue;
|
||||
|
||||
/* Check symbol version. */
|
||||
if (vdso_info.versym
|
||||
&& !vdso_match_version(vdso_info.versym[chain],
|
||||
version, ver_hash))
|
||||
continue;
|
||||
|
||||
return (void *)(vdso_info.load_offset + sym->st_value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vdso_init_from_auxv(void *auxv)
|
||||
{
|
||||
Elf64_auxv_t *elf_auxv = auxv;
|
||||
for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
|
||||
{
|
||||
if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
|
||||
vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
vdso_info.valid = false;
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* vdso_test.c: Sample code to test parse_vdso.c on x86_64
|
||||
* Copyright (c) 2011 Andy Lutomirski
|
||||
* Subject to the GNU General Public License, version 2
|
||||
*
|
||||
* You can amuse yourself by compiling with:
|
||||
* gcc -std=gnu99 -nostdlib
|
||||
* -Os -fno-asynchronous-unwind-tables -flto
|
||||
* vdso_test.c parse_vdso.c -o vdso_test
|
||||
* to generate a small binary with no dependencies at all.
|
||||
*/
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void *vdso_sym(const char *version, const char *name);
|
||||
extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
|
||||
extern void vdso_init_from_auxv(void *auxv);
|
||||
|
||||
/* We need a libc functions... */
|
||||
int strcmp(const char *a, const char *b)
|
||||
{
|
||||
/* This implementation is buggy: it never returns -1. */
|
||||
while (*a || *b) {
|
||||
if (*a != *b)
|
||||
return 1;
|
||||
if (*a == 0 || *b == 0)
|
||||
return 1;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ...and two syscalls. This is x86_64-specific. */
|
||||
static inline long linux_write(int fd, const void *data, size_t len)
|
||||
{
|
||||
|
||||
long ret;
|
||||
asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write),
|
||||
"D" (fd), "S" (data), "d" (len) :
|
||||
"cc", "memory", "rcx",
|
||||
"r8", "r9", "r10", "r11" );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void linux_exit(int code)
|
||||
{
|
||||
asm volatile ("syscall" : : "a" (__NR_exit), "D" (code));
|
||||
}
|
||||
|
||||
void to_base10(char *lastdig, uint64_t n)
|
||||
{
|
||||
while (n) {
|
||||
*lastdig = (n % 10) + '0';
|
||||
n /= 10;
|
||||
lastdig--;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((externally_visible)) void c_main(void **stack)
|
||||
{
|
||||
/* Parse the stack */
|
||||
long argc = (long)*stack;
|
||||
stack += argc + 2;
|
||||
|
||||
/* Now we're pointing at the environment. Skip it. */
|
||||
while(*stack)
|
||||
stack++;
|
||||
stack++;
|
||||
|
||||
/* Now we're pointing at auxv. Initialize the vDSO parser. */
|
||||
vdso_init_from_auxv((void *)stack);
|
||||
|
||||
/* Find gettimeofday. */
|
||||
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
|
||||
gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
|
||||
|
||||
if (!gtod)
|
||||
linux_exit(1);
|
||||
|
||||
struct timeval tv;
|
||||
long ret = gtod(&tv, 0);
|
||||
|
||||
if (ret == 0) {
|
||||
char buf[] = "The time is .000000\n";
|
||||
to_base10(buf + 31, tv.tv_sec);
|
||||
to_base10(buf + 38, tv.tv_usec);
|
||||
linux_write(1, buf, sizeof(buf) - 1);
|
||||
} else {
|
||||
linux_exit(ret);
|
||||
}
|
||||
|
||||
linux_exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the real entry point. It passes the initial stack into
|
||||
* the C entry point.
|
||||
*/
|
||||
asm (
|
||||
".text\n"
|
||||
".global _start\n"
|
||||
".type _start,@function\n"
|
||||
"_start:\n\t"
|
||||
"mov %rsp,%rdi\n\t"
|
||||
"jmp c_main"
|
||||
);
|
|
@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
|
|||
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
|
||||
cpus max.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
hardware requires all the hardware threads in a CPU core to be in the
|
||||
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
|
||||
of vcpus per virtual core (vcore). The vcore id is obtained by
|
||||
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
|
||||
given vcore will always be in the same physical core as each other
|
||||
(though that might be a different physical core from time to time).
|
||||
Userspace can control the threading (SMT) mode of the guest by its
|
||||
allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
|
@ -1143,15 +1156,10 @@ Assigns an IRQ to a passed-through device.
|
|||
|
||||
struct kvm_assigned_irq {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 host_irq;
|
||||
__u32 host_irq; /* ignored (legacy field) */
|
||||
__u32 guest_irq;
|
||||
__u32 flags;
|
||||
union {
|
||||
struct {
|
||||
__u32 addr_lo;
|
||||
__u32 addr_hi;
|
||||
__u32 data;
|
||||
} guest_msi;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
};
|
||||
|
@ -1239,8 +1247,10 @@ Type: vm ioctl
|
|||
Parameters: struct kvm_assigned_msix_nr (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Set the number of MSI-X interrupts for an assigned device. This service can
|
||||
only be called once in the lifetime of an assigned device.
|
||||
Set the number of MSI-X interrupts for an assigned device. The number is
|
||||
reset again by terminating the MSI-X assignment of the device via
|
||||
KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
|
||||
point will fail.
|
||||
|
||||
struct kvm_assigned_msix_nr {
|
||||
__u32 assigned_dev_id;
|
||||
|
@ -1291,6 +1301,135 @@ Returns the tsc frequency of the guest. The unit of the return value is
|
|||
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
|
||||
error.
|
||||
|
||||
4.56 KVM_GET_LAPIC
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_lapic_state (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
#define KVM_APIC_REG_SIZE 0x400
|
||||
struct kvm_lapic_state {
|
||||
char regs[KVM_APIC_REG_SIZE];
|
||||
};
|
||||
|
||||
Reads the Local APIC registers and copies them into the input argument. The
|
||||
data format and layout are the same as documented in the architecture manual.
|
||||
|
||||
4.57 KVM_SET_LAPIC
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_lapic_state (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
#define KVM_APIC_REG_SIZE 0x400
|
||||
struct kvm_lapic_state {
|
||||
char regs[KVM_APIC_REG_SIZE];
|
||||
};
|
||||
|
||||
Copies the input argument into the the Local APIC registers. The data format
|
||||
and layout are the same as documented in the architecture manual.
|
||||
|
||||
4.58 KVM_IOEVENTFD
|
||||
|
||||
Capability: KVM_CAP_IOEVENTFD
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ioeventfd (in)
|
||||
Returns: 0 on success, !0 on error
|
||||
|
||||
This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
|
||||
within the guest. A guest write in the registered address will signal the
|
||||
provided event instead of triggering an exit.
|
||||
|
||||
struct kvm_ioeventfd {
|
||||
__u64 datamatch;
|
||||
__u64 addr; /* legal pio/mmio address */
|
||||
__u32 len; /* 1, 2, 4, or 8 bytes */
|
||||
__s32 fd;
|
||||
__u32 flags;
|
||||
__u8 pad[36];
|
||||
};
|
||||
|
||||
The following flags are defined:
|
||||
|
||||
#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
|
||||
#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
|
||||
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
|
||||
|
||||
If datamatch flag is set, the event will be signaled only if the written value
|
||||
to the registered address is equal to datamatch in struct kvm_ioeventfd.
|
||||
|
||||
4.62 KVM_CREATE_SPAPR_TCE
|
||||
|
||||
Capability: KVM_CAP_SPAPR_TCE
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_create_spapr_tce (in)
|
||||
Returns: file descriptor for manipulating the created TCE table
|
||||
|
||||
This creates a virtual TCE (translation control entry) table, which
|
||||
is an IOMMU for PAPR-style virtual I/O. It is used to translate
|
||||
logical addresses used in virtual I/O into guest physical addresses,
|
||||
and provides a scatter/gather capability for PAPR virtual I/O.
|
||||
|
||||
/* for KVM_CAP_SPAPR_TCE */
|
||||
struct kvm_create_spapr_tce {
|
||||
__u64 liobn;
|
||||
__u32 window_size;
|
||||
};
|
||||
|
||||
The liobn field gives the logical IO bus number for which to create a
|
||||
TCE table. The window_size field specifies the size of the DMA window
|
||||
which this TCE table will translate - the table will contain one 64
|
||||
bit TCE entry for every 4kiB of the DMA window.
|
||||
|
||||
When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
|
||||
table has been created using this ioctl(), the kernel will handle it
|
||||
in real mode, updating the TCE table. H_PUT_TCE calls for other
|
||||
liobns will cause a vm exit and must be handled by userspace.
|
||||
|
||||
The return value is a file descriptor which can be passed to mmap(2)
|
||||
to map the created TCE table into userspace. This lets userspace read
|
||||
the entries written by kernel-handled H_PUT_TCE calls, and also lets
|
||||
userspace update the TCE table directly which is useful in some
|
||||
circumstances.
|
||||
|
||||
4.63 KVM_ALLOCATE_RMA
|
||||
|
||||
Capability: KVM_CAP_PPC_RMA
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_allocate_rma (out)
|
||||
Returns: file descriptor for mapping the allocated RMA
|
||||
|
||||
This allocates a Real Mode Area (RMA) from the pool allocated at boot
|
||||
time by the kernel. An RMA is a physically-contiguous, aligned region
|
||||
of memory used on older POWER processors to provide the memory which
|
||||
will be accessed by real-mode (MMU off) accesses in a KVM guest.
|
||||
POWER processors support a set of sizes for the RMA that usually
|
||||
includes 64MB, 128MB, 256MB and some larger powers of two.
|
||||
|
||||
/* for KVM_ALLOCATE_RMA */
|
||||
struct kvm_allocate_rma {
|
||||
__u64 rma_size;
|
||||
};
|
||||
|
||||
The return value is a file descriptor which can be passed to mmap(2)
|
||||
to map the allocated RMA into userspace. The mapped area can then be
|
||||
passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
|
||||
RMA for a virtual machine. The size of the RMA in bytes (which is
|
||||
fixed at host kernel boot time) is returned in the rma_size field of
|
||||
the argument structure.
|
||||
|
||||
The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
|
||||
is supported; 2 if the processor requires all virtual machines to have
|
||||
an RMA, or 1 if the processor can use an RMA but doesn't require it,
|
||||
because it supports the Virtual RMA (VRMA) facility.
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
|
@ -1473,6 +1612,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
|
|||
necessary. Upon guest entry all guest GPRs will then be replaced by the values
|
||||
in this struct.
|
||||
|
||||
/* KVM_EXIT_PAPR_HCALL */
|
||||
struct {
|
||||
__u64 nr;
|
||||
__u64 ret;
|
||||
__u64 args[9];
|
||||
} papr_hcall;
|
||||
|
||||
This is used on 64-bit PowerPC when emulating a pSeries partition,
|
||||
e.g. with the 'pseries' machine type in qemu. It occurs when the
|
||||
guest does a hypercall using the 'sc 1' instruction. The 'nr' field
|
||||
contains the hypercall number (from the guest R3), and 'args' contains
|
||||
the arguments (from the guest R4 - R12). Userspace should put the
|
||||
return code in 'ret' and any extra returned values in args[].
|
||||
The possible hypercalls are defined in the Power Architecture Platform
|
||||
Requirements (PAPR) document available from www.power.org (free
|
||||
developer registration required to access it).
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
|
|
@ -165,6 +165,10 @@ Shadow pages contain the following information:
|
|||
Contains the value of efer.nxe for which the page is valid.
|
||||
role.cr0_wp:
|
||||
Contains the value of cr0.wp for which the page is valid.
|
||||
role.smep_andnot_wp:
|
||||
Contains the value of cr4.smep && !cr0.wp for which the page is valid
|
||||
(pages for which this is true are different from other pages; see the
|
||||
treatment of cr0.wp=0 below).
|
||||
gfn:
|
||||
Either the guest page table containing the translations shadowed by this
|
||||
page, or the base page frame for linear translations. See role.direct.
|
||||
|
@ -317,6 +321,20 @@ on fault type:
|
|||
|
||||
(user write faults generate a #PF)
|
||||
|
||||
In the first case there is an additional complication if CR4.SMEP is
|
||||
enabled: since we've turned the page into a kernel page, the kernel may now
|
||||
execute it. We handle this by also setting spte.nx. If we get a user
|
||||
fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
|
||||
|
||||
To prevent an spte that was converted into a kernel page with cr0.wp=0
|
||||
from being written by the kernel after cr0.wp has changed to 1, we make
|
||||
the value of cr0.wp part of the page role. This means that an spte created
|
||||
with one value of cr0.wp cannot be used when cr0.wp has a different value -
|
||||
it will simply be missed by the shadow page lookup code. A similar issue
|
||||
exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
|
||||
changing cr4.smep to 1. To avoid this, the value of !cr0.wp && cr4.smep
|
||||
is also made a part of the page role.
|
||||
|
||||
Large pages
|
||||
===========
|
||||
|
||||
|
|
|
@ -185,3 +185,37 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
|||
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
|
||||
MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
|
||||
data: 64-byte alignment physical address of a memory area which must be
|
||||
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
||||
hold a copy of the following structure:
|
||||
|
||||
struct kvm_steal_time {
|
||||
__u64 steal;
|
||||
__u32 version;
|
||||
__u32 flags;
|
||||
__u32 pad[12];
|
||||
}
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
updates of this structure is arbitrary and implementation-dependent.
|
||||
The hypervisor may update this structure at any time it sees fit until
|
||||
anything with bit0 == 0 is written to it. Guest is required to make sure
|
||||
this structure is initialized to zero.
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: a sequence counter. In other words, guest has to check
|
||||
this field before and after grabbing time information and make
|
||||
sure they are both equal and even. An odd version indicates an
|
||||
in-progress update.
|
||||
|
||||
flags: At this point, always zero. May be used to indicate
|
||||
changes in this structure in the future.
|
||||
|
||||
steal: the amount of time in which this vCPU did not run, in
|
||||
nanoseconds. Time during which the vcpu is idle, will not be
|
||||
reported as steal time.
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
Nested VMX
|
||||
==========
|
||||
|
||||
Overview
|
||||
---------
|
||||
|
||||
On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
|
||||
to easily and efficiently run guest operating systems. Normally, these guests
|
||||
*cannot* themselves be hypervisors running their own guests, because in VMX,
|
||||
guests cannot use VMX instructions.
|
||||
|
||||
The "Nested VMX" feature adds this missing capability - of running guest
|
||||
hypervisors (which use VMX) with their own nested guests. It does so by
|
||||
allowing a guest to use VMX instructions, and correctly and efficiently
|
||||
emulating them using the single level of VMX available in the hardware.
|
||||
|
||||
We describe in much greater detail the theory behind the nested VMX feature,
|
||||
its implementation and its performance characteristics, in the OSDI 2010 paper
|
||||
"The Turtles Project: Design and Implementation of Nested Virtualization",
|
||||
available at:
|
||||
|
||||
http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
|
||||
|
||||
|
||||
Terminology
|
||||
-----------
|
||||
|
||||
Single-level virtualization has two levels - the host (KVM) and the guests.
|
||||
In nested virtualization, we have three levels: The host (KVM), which we call
|
||||
L0, the guest hypervisor, which we call L1, and its nested guest, which we
|
||||
call L2.
|
||||
|
||||
|
||||
Known limitations
|
||||
-----------------
|
||||
|
||||
The current code supports running Linux guests under KVM guests.
|
||||
Only 64-bit guest hypervisors are supported.
|
||||
|
||||
Additional patches for running Windows under guest KVM, and Linux under
|
||||
guest VMware server, and support for nested EPT, are currently running in
|
||||
the lab, and will be sent as follow-on patchsets.
|
||||
|
||||
|
||||
Running nested VMX
|
||||
------------------
|
||||
|
||||
The nested VMX feature is disabled by default. It can be enabled by giving
|
||||
the "nested=1" option to the kvm-intel module.
|
||||
|
||||
No modifications are required to user space (qemu). However, qemu's default
|
||||
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
||||
explicitly enabled, by giving qemu one of the following options:
|
||||
|
||||
-cpu host (emulated CPU has all features of the real CPU)
|
||||
|
||||
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
|
||||
|
||||
ABIs
|
||||
----
|
||||
|
||||
Nested VMX aims to present a standard and (eventually) fully-functional VMX
|
||||
implementation for the a guest hypervisor to use. As such, the official
|
||||
specification of the ABI that it provides is Intel's VMX specification,
|
||||
namely volume 3B of their "Intel 64 and IA-32 Architectures Software
|
||||
Developer's Manual". Not all of VMX's features are currently fully supported,
|
||||
but the goal is to eventually support them all, starting with the VMX features
|
||||
which are used in practice by popular hypervisors (KVM and others).
|
||||
|
||||
As a VMX implementation, nested VMX presents a VMCS structure to L1.
|
||||
As mandated by the spec, other than the two fields revision_id and abort,
|
||||
this structure is *opaque* to its user, who is not supposed to know or care
|
||||
about its internal structure. Rather, the structure is accessed through the
|
||||
VMREAD and VMWRITE instructions.
|
||||
Still, for debugging purposes, KVM developers might be interested to know the
|
||||
internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
|
||||
|
||||
The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
|
||||
also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
|
||||
which L0 builds to actually run L2 - how this is done is explained in the
|
||||
aforementioned paper.
|
||||
|
||||
For convenience, we repeat the content of struct vmcs12 here. If the internals
|
||||
of this structure changes, this can break live migration across KVM versions.
|
||||
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
||||
struct shadow_vmcs is ever changed.
|
||||
|
||||
typedef u64 natural_width;
|
||||
struct __packed vmcs12 {
|
||||
/* According to the Intel spec, a VMCS region must start with
|
||||
* these two user-visible fields */
|
||||
u32 revision_id;
|
||||
u32 abort;
|
||||
|
||||
u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
|
||||
u32 padding[7]; /* room for future expansion */
|
||||
|
||||
u64 io_bitmap_a;
|
||||
u64 io_bitmap_b;
|
||||
u64 msr_bitmap;
|
||||
u64 vm_exit_msr_store_addr;
|
||||
u64 vm_exit_msr_load_addr;
|
||||
u64 vm_entry_msr_load_addr;
|
||||
u64 tsc_offset;
|
||||
u64 virtual_apic_page_addr;
|
||||
u64 apic_access_addr;
|
||||
u64 ept_pointer;
|
||||
u64 guest_physical_address;
|
||||
u64 vmcs_link_pointer;
|
||||
u64 guest_ia32_debugctl;
|
||||
u64 guest_ia32_pat;
|
||||
u64 guest_ia32_efer;
|
||||
u64 guest_pdptr0;
|
||||
u64 guest_pdptr1;
|
||||
u64 guest_pdptr2;
|
||||
u64 guest_pdptr3;
|
||||
u64 host_ia32_pat;
|
||||
u64 host_ia32_efer;
|
||||
u64 padding64[8]; /* room for future expansion */
|
||||
natural_width cr0_guest_host_mask;
|
||||
natural_width cr4_guest_host_mask;
|
||||
natural_width cr0_read_shadow;
|
||||
natural_width cr4_read_shadow;
|
||||
natural_width cr3_target_value0;
|
||||
natural_width cr3_target_value1;
|
||||
natural_width cr3_target_value2;
|
||||
natural_width cr3_target_value3;
|
||||
natural_width exit_qualification;
|
||||
natural_width guest_linear_address;
|
||||
natural_width guest_cr0;
|
||||
natural_width guest_cr3;
|
||||
natural_width guest_cr4;
|
||||
natural_width guest_es_base;
|
||||
natural_width guest_cs_base;
|
||||
natural_width guest_ss_base;
|
||||
natural_width guest_ds_base;
|
||||
natural_width guest_fs_base;
|
||||
natural_width guest_gs_base;
|
||||
natural_width guest_ldtr_base;
|
||||
natural_width guest_tr_base;
|
||||
natural_width guest_gdtr_base;
|
||||
natural_width guest_idtr_base;
|
||||
natural_width guest_dr7;
|
||||
natural_width guest_rsp;
|
||||
natural_width guest_rip;
|
||||
natural_width guest_rflags;
|
||||
natural_width guest_pending_dbg_exceptions;
|
||||
natural_width guest_sysenter_esp;
|
||||
natural_width guest_sysenter_eip;
|
||||
natural_width host_cr0;
|
||||
natural_width host_cr3;
|
||||
natural_width host_cr4;
|
||||
natural_width host_fs_base;
|
||||
natural_width host_gs_base;
|
||||
natural_width host_tr_base;
|
||||
natural_width host_gdtr_base;
|
||||
natural_width host_idtr_base;
|
||||
natural_width host_ia32_sysenter_esp;
|
||||
natural_width host_ia32_sysenter_eip;
|
||||
natural_width host_rsp;
|
||||
natural_width host_rip;
|
||||
natural_width paddingl[8]; /* room for future expansion */
|
||||
u32 pin_based_vm_exec_control;
|
||||
u32 cpu_based_vm_exec_control;
|
||||
u32 exception_bitmap;
|
||||
u32 page_fault_error_code_mask;
|
||||
u32 page_fault_error_code_match;
|
||||
u32 cr3_target_count;
|
||||
u32 vm_exit_controls;
|
||||
u32 vm_exit_msr_store_count;
|
||||
u32 vm_exit_msr_load_count;
|
||||
u32 vm_entry_controls;
|
||||
u32 vm_entry_msr_load_count;
|
||||
u32 vm_entry_intr_info_field;
|
||||
u32 vm_entry_exception_error_code;
|
||||
u32 vm_entry_instruction_len;
|
||||
u32 tpr_threshold;
|
||||
u32 secondary_vm_exec_control;
|
||||
u32 vm_instruction_error;
|
||||
u32 vm_exit_reason;
|
||||
u32 vm_exit_intr_info;
|
||||
u32 vm_exit_intr_error_code;
|
||||
u32 idt_vectoring_info_field;
|
||||
u32 idt_vectoring_error_code;
|
||||
u32 vm_exit_instruction_len;
|
||||
u32 vmx_instruction_info;
|
||||
u32 guest_es_limit;
|
||||
u32 guest_cs_limit;
|
||||
u32 guest_ss_limit;
|
||||
u32 guest_ds_limit;
|
||||
u32 guest_fs_limit;
|
||||
u32 guest_gs_limit;
|
||||
u32 guest_ldtr_limit;
|
||||
u32 guest_tr_limit;
|
||||
u32 guest_gdtr_limit;
|
||||
u32 guest_idtr_limit;
|
||||
u32 guest_es_ar_bytes;
|
||||
u32 guest_cs_ar_bytes;
|
||||
u32 guest_ss_ar_bytes;
|
||||
u32 guest_ds_ar_bytes;
|
||||
u32 guest_fs_ar_bytes;
|
||||
u32 guest_gs_ar_bytes;
|
||||
u32 guest_ldtr_ar_bytes;
|
||||
u32 guest_tr_ar_bytes;
|
||||
u32 guest_interruptibility_info;
|
||||
u32 guest_activity_state;
|
||||
u32 guest_sysenter_cs;
|
||||
u32 host_ia32_sysenter_cs;
|
||||
u32 padding32[8]; /* room for future expansion */
|
||||
u16 virtual_processor_id;
|
||||
u16 guest_es_selector;
|
||||
u16 guest_cs_selector;
|
||||
u16 guest_ss_selector;
|
||||
u16 guest_ds_selector;
|
||||
u16 guest_fs_selector;
|
||||
u16 guest_gs_selector;
|
||||
u16 guest_ldtr_selector;
|
||||
u16 guest_tr_selector;
|
||||
u16 host_es_selector;
|
||||
u16 host_cs_selector;
|
||||
u16 host_ss_selector;
|
||||
u16 host_ds_selector;
|
||||
u16 host_fs_selector;
|
||||
u16 host_gs_selector;
|
||||
u16 host_tr_selector;
|
||||
};
|
||||
|
||||
|
||||
Authors
|
||||
-------
|
||||
|
||||
These patches were written by:
|
||||
Abel Gordon, abelg <at> il.ibm.com
|
||||
Nadav Har'El, nyh <at> il.ibm.com
|
||||
Orit Wasserman, oritw <at> il.ibm.com
|
||||
Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
|
||||
With contributions by:
|
||||
Anthony Liguori, aliguori <at> us.ibm.com
|
||||
Mike Day, mdday <at> us.ibm.com
|
||||
Michael Factor, factor <at> il.ibm.com
|
||||
Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
|
||||
And valuable reviews by:
|
||||
Avi Kivity, avi <at> redhat.com
|
||||
Gleb Natapov, gleb <at> redhat.com
|
||||
Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
Kevin Tian, kevin.tian <at> intel.com
|
||||
and others.
|
|
@ -68,9 +68,11 @@ page that contains parts of supervisor visible register state. The guest can
|
|||
map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
|
||||
|
||||
With this hypercall issued the guest always gets the magic page mapped at the
|
||||
desired location in effective and physical address space. For now, we always
|
||||
map the page to -4096. This way we can access it using absolute load and store
|
||||
functions. The following instruction reads the first field of the magic page:
|
||||
desired location. The first parameter indicates the effective address when the
|
||||
MMU is enabled. The second parameter indicates the address in real mode, if
|
||||
applicable to the target. For now, we always map the page to -4096. This way we
|
||||
can access it using absolute load and store functions. The following
|
||||
instruction reads the first field of the magic page:
|
||||
|
||||
ld rX, -4096(0)
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
#include <asm/bootparam.h>
|
||||
#include "../../../include/linux/lguest_launcher.h"
|
||||
/*L:110
|
||||
* We can ignore the 42 include files we need for this program, but I do want
|
||||
* We can ignore the 43 include files we need for this program, but I do want
|
||||
* to draw attention to the use of kernel-style types.
|
||||
*
|
||||
* As Linus said, "C is a Spartan language, and so should your naming be." I
|
||||
|
@ -65,7 +65,6 @@ typedef uint16_t u16;
|
|||
typedef uint8_t u8;
|
||||
/*:*/
|
||||
|
||||
#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
|
||||
#define BRIDGE_PFX "bridge:"
|
||||
#ifndef SIOCBRADDIF
|
||||
#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
|
||||
|
@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
|
|||
/* writev can return a partial write, so we loop here. */
|
||||
while (!iov_empty(iov, out)) {
|
||||
int len = writev(STDOUT_FILENO, iov, out);
|
||||
if (len <= 0)
|
||||
err(1, "Write to stdout gave %i", len);
|
||||
if (len <= 0) {
|
||||
warn("Write to stdout gave %i (%d)", len, errno);
|
||||
break;
|
||||
}
|
||||
iov_consume(iov, out, len);
|
||||
}
|
||||
|
||||
|
@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
|
|||
* same format: what a coincidence!
|
||||
*/
|
||||
if (writev(net_info->tunfd, iov, out) < 0)
|
||||
errx(1, "Write to tun failed?");
|
||||
warnx("Write to tun failed (%d)?", errno);
|
||||
|
||||
/*
|
||||
* Done with that one; wait_for_vq_desc() will send the interrupt if
|
||||
|
@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
|
|||
*/
|
||||
len = readv(net_info->tunfd, iov, in);
|
||||
if (len <= 0)
|
||||
err(1, "Failed to read from tun.");
|
||||
warn("Failed to read from tun (%d).", errno);
|
||||
|
||||
/*
|
||||
* Mark that packet buffer as used, but don't interrupt here. We want
|
||||
|
@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
|
|||
warnx("Device %s configuration FAILED", dev->name);
|
||||
if (dev->running)
|
||||
reset_device(dev);
|
||||
} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
if (!dev->running)
|
||||
start_device(dev);
|
||||
} else {
|
||||
if (dev->running)
|
||||
err(1, "Device %s features finalized twice", dev->name);
|
||||
start_device(dev);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Devices *can* be used before status is set to DRIVER_OK.
|
||||
* The original plan was that they would never do this: they
|
||||
* would always finish setting up their status bits before
|
||||
* actually touching the virtqueues. In practice, we allowed
|
||||
* them to, and they do (eg. the disk probes for partition
|
||||
* tables as part of initialization).
|
||||
*
|
||||
* If we see this, we start the device: once it's running, we
|
||||
* expect the device to catch all the notifications.
|
||||
*/
|
||||
/* Devices should not be used before features are finalized. */
|
||||
for (vq = i->vq; vq; vq = vq->next) {
|
||||
if (addr != vq->config.pfn*getpagesize())
|
||||
continue;
|
||||
if (i->running)
|
||||
errx(1, "Notification on running %s", i->name);
|
||||
/* This just calls create_thread() for each virtqueue */
|
||||
start_device(i);
|
||||
return;
|
||||
errx(1, "Notification on %s before setup!", i->name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1370,7 +1358,7 @@ static void setup_console(void)
|
|||
* --sharenet=<name> option which opens or creates a named pipe. This can be
|
||||
* used to send packets to another guest in a 1:1 manner.
|
||||
*
|
||||
* More sopisticated is to use one of the tools developed for project like UML
|
||||
* More sophisticated is to use one of the tools developed for project like UML
|
||||
* to do networking.
|
||||
*
|
||||
* Faster is to do virtio bonding in kernel. Doing this 1:1 would be
|
||||
|
@ -1380,7 +1368,7 @@ static void setup_console(void)
|
|||
* multiple inter-guest channels behind one interface, although it would
|
||||
* require some manner of hotplugging new virtio channels.
|
||||
*
|
||||
* Finally, we could implement a virtio network switch in the kernel.
|
||||
* Finally, we could use a virtio network switch in the kernel, ie. vhost.
|
||||
:*/
|
||||
|
||||
static u32 str2ip(const char *ipaddr)
|
||||
|
@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
|
|||
/* Tell the entry path not to try to reload segment registers. */
|
||||
boot->hdr.loadflags |= KEEP_SEGMENTS;
|
||||
|
||||
/*
|
||||
* We tell the kernel to initialize the Guest: this returns the open
|
||||
* /dev/lguest file descriptor.
|
||||
*/
|
||||
/* We tell the kernel to initialize the Guest. */
|
||||
tell_kernel(start);
|
||||
|
||||
/* Ensure that we terminate if a device-servicing child dies. */
|
||||
|
|
|
@ -674,7 +674,7 @@ Protocol: 2.10+
|
|||
|
||||
Field name: init_size
|
||||
Type: read
|
||||
Offset/size: 0x25c/4
|
||||
Offset/size: 0x260/4
|
||||
|
||||
This field indicates the amount of linear contiguous memory starting
|
||||
at the kernel runtime start address that the kernel needs before it
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
This file documents some of the kernel entries in
|
||||
arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from
|
||||
an email from Ingo Molnar:
|
||||
|
||||
http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
|
||||
|
||||
The x86 architecture has quite a few different ways to jump into
|
||||
kernel code. Most of these entry points are registered in
|
||||
arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
|
||||
and arch/x86/ia32/ia32entry.S.
|
||||
|
||||
The IDT vector assignments are listed in arch/x86/include/irq_vectors.h.
|
||||
|
||||
Some of these entries are:
|
||||
|
||||
- system_call: syscall instruction from 64-bit code.
|
||||
|
||||
- ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall
|
||||
either way.
|
||||
|
||||
- ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit
|
||||
code
|
||||
|
||||
- interrupt: An array of entries. Every IDT vector that doesn't
|
||||
explicitly point somewhere else gets set to the corresponding
|
||||
value in interrupts. These point to a whole array of
|
||||
magically-generated functions that make their way to do_IRQ with
|
||||
the interrupt number as a parameter.
|
||||
|
||||
- emulate_vsyscall: int 0xcc, a special non-ABI entry used by
|
||||
vsyscall emulation.
|
||||
|
||||
- APIC interrupts: Various special-purpose interrupts for things
|
||||
like TLB shootdown.
|
||||
|
||||
- Architecturally-defined exceptions like divide_error.
|
||||
|
||||
There are a few complexities here. The different x86-64 entries
|
||||
have different calling conventions. The syscall and sysenter
|
||||
instructions have their own peculiar calling conventions. Some of
|
||||
the IDT entries push an error code onto the stack; others don't.
|
||||
IDT entries using the IST alternative stack mechanism need their own
|
||||
magic to get the stack frames right. (You can find some
|
||||
documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM,
|
||||
Volume 3, Chapter 6.)
|
||||
|
||||
Dealing with the swapgs instruction is especially tricky. Swapgs
|
||||
toggles whether gs is the kernel gs or the user gs. The swapgs
|
||||
instruction is rather fragile: it must nest perfectly and only in
|
||||
single depth, it should only be used if entering from user mode to
|
||||
kernel mode and then when returning to user-space, and precisely
|
||||
so. If we mess that up even slightly, we crash.
|
||||
|
||||
So when we have a secondary entry, already in kernel mode, we *must
|
||||
not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's
|
||||
not switched/swapped yet.
|
||||
|
||||
Now, there's a secondary complication: there's a cheap way to test
|
||||
which mode the CPU is in and an expensive way.
|
||||
|
||||
The cheap way is to pick this info off the entry frame on the kernel
|
||||
stack, from the CS of the ptregs area of the kernel stack:
|
||||
|
||||
xorl %ebx,%ebx
|
||||
testl $3,CS+8(%rsp)
|
||||
je error_kernelspace
|
||||
SWAPGS
|
||||
|
||||
The expensive (paranoid) way is to read back the MSR_GS_BASE value
|
||||
(which is what SWAPGS modifies):
|
||||
|
||||
movl $1,%ebx
|
||||
movl $MSR_GS_BASE,%ecx
|
||||
rdmsr
|
||||
testl %edx,%edx
|
||||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx,%ebx
|
||||
1: ret
|
||||
|
||||
and the whole paranoid non-paranoid macro complexity is about whether
|
||||
to suffer that RDMSR cost.
|
||||
|
||||
If we are at an interrupt or user-trap/gate-alike boundary then we can
|
||||
use the faster check: the stack will be a reliable indicator of
|
||||
whether SWAPGS was already done: if we see that we are a secondary
|
||||
entry interrupting kernel mode execution, then we know that the GS
|
||||
base has already been switched. If it says that we interrupted
|
||||
user-space execution then we must do the SWAPGS.
|
||||
|
||||
But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context,
|
||||
which might have triggered right after a normal entry wrote CS to the
|
||||
stack but before we executed SWAPGS, then the only safe way to check
|
||||
for GS is the slower method: the RDMSR.
|
||||
|
||||
So we try only to mark those entry methods 'paranoid' that absolutely
|
||||
need the more expensive check for the GS base - and we generate all
|
||||
'normal' entry points with the regular (faster) entry macros.
|
|
@ -67,7 +67,7 @@ Linux
|
|||
|
||||
12:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
|
||||
CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP测试,并且同时都
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP测试,并且同时都
|
||||
使能。
|
||||
|
||||
13:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。
|
||||
|
|
|
@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
|
|||
RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h
|
||||
SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h
|
||||
HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
|
||||
APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c
|
||||
APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
|
||||
CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
|
||||
DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
|
||||
DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c
|
||||
|
|
92
MAINTAINERS
92
MAINTAINERS
|
@ -1,4 +1,5 @@
|
|||
|
||||
|
||||
List of maintainers and how to submit kernel changes
|
||||
|
||||
Please try to follow the guidelines below. This will make things
|
||||
|
@ -533,6 +534,8 @@ L: device-drivers-devel@blackfin.uclinux.org
|
|||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
W: http://wiki.analog.com/
|
||||
S: Supported
|
||||
F: sound/soc/codecs/adau*
|
||||
F: sound/soc/codecs/adav*
|
||||
F: sound/soc/codecs/ad1*
|
||||
F: sound/soc/codecs/ssm*
|
||||
|
||||
|
@ -594,6 +597,16 @@ S: Maintained
|
|||
F: arch/arm/lib/floppydma.S
|
||||
F: arch/arm/include/asm/floppy.h
|
||||
|
||||
ARM PMU PROFILING AND DEBUGGING
|
||||
M: Will Deacon <will.deacon@arm.com>
|
||||
S: Maintained
|
||||
F: arch/arm/kernel/perf_event*
|
||||
F: arch/arm/oprofile/common.c
|
||||
F: arch/arm/kernel/pmu.c
|
||||
F: arch/arm/include/asm/pmu.h
|
||||
F: arch/arm/kernel/hw_breakpoint.c
|
||||
F: arch/arm/include/asm/hw_breakpoint.h
|
||||
|
||||
ARM PORT
|
||||
M: Russell King <linux@arm.linux.org.uk>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
|
@ -683,7 +696,7 @@ T: git git://git.infradead.org/users/cbou/linux-cns3xxx.git
|
|||
|
||||
ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
|
||||
M: Hartley Sweeten <hsweeten@visionengravers.com>
|
||||
M: Ryan Mallon <ryan@bluewatersys.com>
|
||||
M: Ryan Mallon <rmallon@gmail.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
F: arch/arm/mach-ep93xx/
|
||||
|
@ -1541,6 +1554,12 @@ L: linux-wireless@vger.kernel.org
|
|||
S: Supported
|
||||
F: drivers/staging/brcm80211/
|
||||
|
||||
BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER
|
||||
M: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/scsi/bnx2fc/
|
||||
|
||||
BROCADE BFA FC SCSI DRIVER
|
||||
M: Jing Huang <huangj@brocade.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
|
@ -1570,7 +1589,7 @@ F: Documentation/sound/alsa/Bt87x.txt
|
|||
F: sound/pci/bt87x.c
|
||||
|
||||
BT8XXGPIO DRIVER
|
||||
M: Michael Buesch <mb@bu3sch.de>
|
||||
M: Michael Buesch <m@bues.ch>
|
||||
W: http://bu3sch.de/btgpio.php
|
||||
S: Maintained
|
||||
F: drivers/gpio/bt8xxgpio.c
|
||||
|
@ -1763,7 +1782,8 @@ F: include/linux/clk.h
|
|||
|
||||
CISCO FCOE HBA DRIVER
|
||||
M: Abhijeet Joglekar <abjoglek@cisco.com>
|
||||
M: Joe Eykholt <jeykholt@cisco.com>
|
||||
M: Venkata Siva Vijayendra Bhamidipati <vbhamidi@cisco.com>
|
||||
M: Brian Uchino <buchino@cisco.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/scsi/fnic/
|
||||
|
@ -2198,7 +2218,7 @@ F: drivers/acpi/dock.c
|
|||
DOCUMENTATION
|
||||
M: Randy Dunlap <rdunlap@xenotime.net>
|
||||
L: linux-doc@vger.kernel.org
|
||||
T: quilt oss.oracle.com/~rdunlap/kernel-doc-patches/current/
|
||||
T: quilt http://userweb.kernel.org/~rdunlap/kernel-doc-patches/current/
|
||||
S: Maintained
|
||||
F: Documentation/
|
||||
|
||||
|
@ -2993,7 +3013,7 @@ F: kernel/hrtimer.c
|
|||
F: kernel/time/clockevents.c
|
||||
F: kernel/time/tick*.*
|
||||
F: kernel/time/timer_*.c
|
||||
F: include/linux/clockevents.h
|
||||
F: include/linux/clockchips.h
|
||||
F: include/linux/hrtimer.h
|
||||
|
||||
HIGH-SPEED SCC DRIVER FOR AX.25
|
||||
|
@ -3416,10 +3436,9 @@ S: Maintained
|
|||
F: drivers/net/ipg.*
|
||||
|
||||
IPATH DRIVER
|
||||
M: Ralph Campbell <infinipath@qlogic.com>
|
||||
M: Mike Marciniszyn <infinipath@qlogic.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
T: git git://git.qlogic.com/ipath-linux-2.6
|
||||
S: Supported
|
||||
S: Maintained
|
||||
F: drivers/infiniband/hw/ipath/
|
||||
|
||||
IPMI SUBSYSTEM
|
||||
|
@ -3942,6 +3961,13 @@ L: lm-sensors@lm-sensors.org
|
|||
S: Maintained
|
||||
F: drivers/hwmon/lm73.c
|
||||
|
||||
LM78 HARDWARE MONITOR DRIVER
|
||||
M: Jean Delvare <khali@linux-fr.org>
|
||||
L: lm-sensors@lm-sensors.org
|
||||
S: Maintained
|
||||
F: Documentation/hwmon/lm78
|
||||
F: drivers/hwmon/lm78.c
|
||||
|
||||
LM83 HARDWARE MONITOR DRIVER
|
||||
M: Jean Delvare <khali@linux-fr.org>
|
||||
L: lm-sensors@lm-sensors.org
|
||||
|
@ -4088,6 +4114,12 @@ S: Maintained
|
|||
F: drivers/net/mv643xx_eth.*
|
||||
F: include/linux/mv643xx.h
|
||||
|
||||
MARVELL MWIFIEX WIRELESS DRIVER
|
||||
M: Bing Zhao <bzhao@marvell.com>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/wireless/mwifiex/
|
||||
|
||||
MARVELL MWL8K WIRELESS DRIVER
|
||||
M: Lennert Buytenhek <buytenh@wantstofly.org>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
|
@ -4280,8 +4312,8 @@ S: Maintained
|
|||
F: drivers/usb/musb/
|
||||
|
||||
MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE)
|
||||
M: Jon Mason <mason@myri.com>
|
||||
M: Andrew Gallatin <gallatin@myri.com>
|
||||
M: Brice Goglin <brice@myri.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.myri.com/scs/download-Myri10GE.html
|
||||
S: Supported
|
||||
|
@ -4575,9 +4607,8 @@ S: Maintained
|
|||
F: drivers/mmc/host/omap.c
|
||||
|
||||
OMAP HS MMC SUPPORT
|
||||
M: Madhusudhan Chikkature <madhu.cr@ti.com>
|
||||
L: linux-omap@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Orphan
|
||||
F: drivers/mmc/host/omap_hsmmc.c
|
||||
|
||||
OMAP RANDOM NUMBER GENERATOR SUPPORT
|
||||
|
@ -4671,6 +4702,14 @@ F: drivers/of
|
|||
F: include/linux/of*.h
|
||||
K: of_get_property
|
||||
|
||||
OPENRISC ARCHITECTURE
|
||||
M: Jonas Bonn <jonas@southpole.se>
|
||||
W: http://openrisc.net
|
||||
L: linux@lists.openrisc.net
|
||||
S: Maintained
|
||||
T: git git://openrisc.net/~jonas/linux
|
||||
F: arch/openrisc
|
||||
|
||||
OPL4 DRIVER
|
||||
M: Clemens Ladisch <clemens@ladisch.de>
|
||||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
|
@ -4983,7 +5022,7 @@ F: drivers/power/power_supply*
|
|||
|
||||
PNP SUPPORT
|
||||
M: Adam Belay <abelay@mit.edu>
|
||||
M: Bjorn Helgaas <bjorn.helgaas@hp.com>
|
||||
M: Bjorn Helgaas <bhelgaas@google.com>
|
||||
S: Maintained
|
||||
F: drivers/pnp/
|
||||
|
||||
|
@ -5143,6 +5182,12 @@ M: Robert Jarzmik <robert.jarzmik@free.fr>
|
|||
L: rtc-linux@googlegroups.com
|
||||
S: Maintained
|
||||
|
||||
QIB DRIVER
|
||||
M: Mike Marciniszyn <infinipath@qlogic.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/infiniband/hw/qib/
|
||||
|
||||
QLOGIC QLA1280 SCSI DRIVER
|
||||
M: Michael Reed <mdr@sgi.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
|
@ -5304,6 +5349,13 @@ L: reiserfs-devel@vger.kernel.org
|
|||
S: Supported
|
||||
F: fs/reiserfs/
|
||||
|
||||
REGISTER MAP ABSTRACTION
|
||||
M: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git
|
||||
S: Supported
|
||||
F: drivers/base/regmap/
|
||||
F: include/linux/regmap.h
|
||||
|
||||
RFKILL
|
||||
M: Johannes Berg <johannes@sipsolutions.net>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
|
@ -5866,7 +5918,7 @@ S: Maintained
|
|||
F: drivers/net/sonic.*
|
||||
|
||||
SONICS SILICON BACKPLANE DRIVER (SSB)
|
||||
M: Michael Buesch <mb@bu3sch.de>
|
||||
M: Michael Buesch <m@bues.ch>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/ssb/
|
||||
|
@ -6372,9 +6424,14 @@ F: drivers/char/toshiba.c
|
|||
F: include/linux/toshiba.h
|
||||
|
||||
TMIO MMC DRIVER
|
||||
M: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
|
||||
M: Ian Molton <ian@mnementh.co.uk>
|
||||
L: linux-mmc@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/mmc/host/tmio_mmc.*
|
||||
F: drivers/mmc/host/tmio_mmc*
|
||||
F: drivers/mmc/host/sh_mobile_sdhi.c
|
||||
F: include/linux/mmc/tmio.h
|
||||
F: include/linux/mmc/sh_mobile_sdhi.h
|
||||
|
||||
TMPFS (SHMEM FILESYSTEM)
|
||||
M: Hugh Dickins <hughd@google.com>
|
||||
|
@ -6451,7 +6508,7 @@ F: drivers/scsi/u14-34f.c
|
|||
|
||||
UBI FILE SYSTEM (UBIFS)
|
||||
M: Artem Bityutskiy <dedekind1@gmail.com>
|
||||
M: Adrian Hunter <adrian.hunter@nokia.com>
|
||||
M: Adrian Hunter <adrian.hunter@intel.com>
|
||||
L: linux-mtd@lists.infradead.org
|
||||
T: git git://git.infradead.org/ubifs-2.6.git
|
||||
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
|
||||
|
@ -6866,6 +6923,7 @@ F: fs/fat/
|
|||
VIDEOBUF2 FRAMEWORK
|
||||
M: Pawel Osciak <pawel@osciak.com>
|
||||
M: Marek Szyprowski <m.szyprowski@samsung.com>
|
||||
M: Kyungmin Park <kyungmin.park@samsung.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/media/video/videobuf2-*
|
||||
|
@ -7060,9 +7118,9 @@ S: Maintained
|
|||
F: drivers/input/misc/wistron_btns.c
|
||||
|
||||
WL1251 WIRELESS DRIVER
|
||||
M: Kalle Valo <kvalo@adurom.com>
|
||||
M: Luciano Coelho <coelho@ti.com>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
W: http://wireless.kernel.org
|
||||
W: http://wireless.kernel.org/en/users/Drivers/wl1251
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
|
||||
S: Maintained
|
||||
F: drivers/net/wireless/wl1251/*
|
||||
|
|
3
Makefile
3
Makefile
|
@ -1,7 +1,7 @@
|
|||
VERSION = 3
|
||||
PATCHLEVEL = 0
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc6
|
||||
EXTRAVERSION =
|
||||
NAME = Sneaky Weasel
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -1290,6 +1290,7 @@ help:
|
|||
@echo ' make O=dir [targets] Locate all output files in "dir", including .config'
|
||||
@echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
|
||||
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
|
||||
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
|
||||
@echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'
|
||||
@echo ' 1: warnings which may be relevant and do not occur too often'
|
||||
@echo ' 2: warnings which occur quite often but may still be relevant'
|
||||
|
|
|
@ -6,6 +6,7 @@ config ALPHA
|
|||
select HAVE_OPROFILE
|
||||
select HAVE_SYSCALL_WRAPPERS
|
||||
select HAVE_IRQ_WORK
|
||||
select HAVE_PCSPKR_PLATFORM
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_DMA_ATTRS
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
/*
|
||||
* 8253/8254 Programmable Interval Timer
|
||||
*/
|
|
@ -27,7 +27,7 @@
|
|||
#define fd_cacheflush(addr,size) /* nothing */
|
||||
#define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt,\
|
||||
IRQF_DISABLED, "floppy", NULL)
|
||||
#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL);
|
||||
#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL)
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
|
||||
|
|
|
@ -29,20 +29,6 @@
|
|||
#define DEBUGP(fmt...)
|
||||
#endif
|
||||
|
||||
void *
|
||||
module_alloc(unsigned long size)
|
||||
{
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
return vmalloc(size);
|
||||
}
|
||||
|
||||
void
|
||||
module_free(struct module *mod, void *module_region)
|
||||
{
|
||||
vfree(module_region);
|
||||
}
|
||||
|
||||
/* Allocate the GOT at the end of the core sections. */
|
||||
|
||||
struct got_entry {
|
||||
|
@ -155,14 +141,6 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
apply_relocate(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
|
||||
unsigned int relsec, struct module *me)
|
||||
{
|
||||
printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
int
|
||||
apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
|
||||
unsigned int symindex, unsigned int relsec,
|
||||
|
@ -302,15 +280,3 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
||||
struct module *me)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
module_arch_cleanup(struct module *mod)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
|
|||
data.period = event->hw.last_period;
|
||||
|
||||
if (alpha_perf_event_set_period(event, hwc, idx)) {
|
||||
if (perf_event_overflow(event, 1, &data, regs)) {
|
||||
if (perf_event_overflow(event, &data, regs)) {
|
||||
/* Interrupts coming too quickly; "throttle" the
|
||||
* counter, i.e., disable it for a little while.
|
||||
*/
|
||||
|
|
|
@ -200,7 +200,6 @@ show_regs(struct pt_regs *regs)
|
|||
void
|
||||
start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
|
||||
{
|
||||
set_fs(USER_DS);
|
||||
regs->pc = pc;
|
||||
regs->ps = 8;
|
||||
wrusp(sp);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue