introduction to kernel programming

23
Introduction to Kernel Coding Demystifying Kernel Programming

Upload: ahmed-mekkawy

Post on 29-Jan-2018

5.367 views

Category:

Technology


0 download

TRANSCRIPT

Page 1: Introduction to Kernel Programming

Introduction to Kernel Coding

Demystifying Kernel Programming

Page 2: Introduction to Kernel Programming

Outline

● Context of execution● Memory● I/O

Page 3: Introduction to Kernel Programming

Mechanism vs Policy

● Mechanism: Interface to the system resources

● Policy: How the resource is used● Examples:

– Udev

– File configuration

Page 4: Introduction to Kernel Programming

Context of execution

● Possible contexts– System Call

– Interrupt Handling

– Tasklets

– Kernel threadsResourceHandler

Resource

User process

Kernel thread

System Call Handling

Interrupt Handling

Userspace

Kernelspace

Tasklet

Page 5: Introduction to Kernel Programming

Why do we care?

● Blocking:– Mutual exclusion / Reentrancy

– Resource Allocation

– Mixed context code

● System responsiveness● Crashes – what's at stake

Page 6: Introduction to Kernel Programming

Interface

● General Pattern– Central Data

Structure

– Register entry points

– Entry point definition

● Know your subsystem

Res

ourc

e H

andl

er

interface { meth1meth2

...}

Register

deregister

meth1 (DS)

meth2 (DS)

Container

SUBSYSTEM

consumer

Op

Page 7: Introduction to Kernel Programming

Example – Fileops

DR

IVE

R/F

S M

OD

ULE

fleops { myopenmyreadmyclose}

Register

deregister

myopen (FILE)

myread

myclose

M,M:FOPS

open(fd)

read

write

VFS

USERKERNEL

Page 8: Introduction to Kernel Programming

Registration

● For certain type, e.g. filesystem● For specific objects e.g. file ops

– Detection by the driver – legacy

– Detection by a bus driver

Page 9: Introduction to Kernel Programming

static int ext3_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt){ return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);}

static struct file_system_typeext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", .get_sb = ext3_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV,};

static int __init init_ext3_fs(void){ ... err = register_filesystem (&ext3_fs_type);

... return 0;}

struct vfsmount * vfs_kern_mount( struct file_system_type *type,

int flags, const char *name, void *data){ struct vfsmount *mnt; int error; mnt = alloc_vfsmnt(name);

... error = type->get_sb(type, flags, name, data, mnt);

... mnt->mnt_mountpoint = mnt->mnt_root;

... return mnt;}

static struct file_system_type **find_filesystem (const char *name, unsigned len){ struct file_system_type **p; for (p=&file_systems; *p; p=&(*p)->next) if (strlen((*p)->name) == len && strncmp((*p)->name, name, len) == 0) break; return p;}

struct vfsmount * do_kern_mount( const char *fstype,

int flags, const char *name, void *data){ struct file_system_type *type =

get_fs_type(fstype); struct vfsmount *mnt;

... mnt = vfs_kern_mount(type, flags, name, data);

... return mnt;}

int register_filesystem(struct file_system_type * fs){ int res = 0; struct file_system_type ** p;

... INIT_LIST_HEAD(&fs->fs_supers); write_lock(&file_systems_lock); p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else *p = fs; write_unlock(&file_systems_lock); return res;}

struct file_system_type *get_fs_type(const char *name){ struct file_system_type *fs; unsigned len = ... strlen(name); read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); read_unlock(&file_systems_lock); if (!fs && (request_module("%.*s", len, name) == 0)) { read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); if (fs && !try_module_get(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); } return fs;}

VFS

EXT3

Page 10: Introduction to Kernel Programming

Device Model

(Bovet et al)

SUBSYSTEM

kset

kobject

attribute1attribute2

...ResourceHandler

PCI

pci_register_driver

probe

driver_if{...

probe}

Scan actions

register_device

Page 11: Introduction to Kernel Programming

Interrupts

● Registering for interrupts● Interrupt Handling – fast and alert

– Critical regions: Spinlocks and SMP systems

– Memory allocation

– System is unresponsive, interrupts masked

● Tasklets – pretty fast, pretty alert● Workqueues – sleep all you want

Page 12: Introduction to Kernel Programming

Interrupt Handling

WORKQhandler

ISR

Initialization

Tasklet

request_irq

Device

Interrupt

KERNEL PROPER

schedule_work

tasklet_schedule

DRIVER

Page 13: Introduction to Kernel Programming

static irqreturn_t ipw_isr(int irq, void *data){ struct ipw_priv *priv = data; u32 inta, inta_mask;

... spin_lock(&priv->irq_lock);

... inta_mask = ipw_read32(priv, IPW_INTA_MASK_R);

... if (!(inta & (IPW_INTA_MASK_ALL & inta_mask))) {

... } __ipw_disable_interrupts(priv); inta &= (IPW_INTA_MASK_ALL & inta_mask); ipw_write32(priv, IPW_INTA_RW, inta); priv->isr_inta = inta; tasklet_schedule(&priv->irq_tasklet); spin_unlock(&priv->irq_lock); return IRQ_HANDLED;}

static void ipw_bg_link_down(struct work_struct *work){ struct ipw_priv *priv = container_of(work, struct ipw_priv, link_down); mutex_lock(&priv->mutex); ipw_link_down(priv); mutex_unlock(&priv->mutex);}

static void ipw_irq_tasklet(struct ipw_priv *priv){ u32 inta, inta_mask, handled = 0; unsigned long flags; spin_lock_irqsave(&priv->irq_lock, flags); inta = ipw_read32(priv, IPW_INTA_RW); inta_mask = ipw_read32(priv, IPW_INTA_MASK_R); inta &= (IPW_INTA_MASK_ALL & inta_mask); spin_unlock_irqrestore(&priv->irq_lock, flags); spin_lock_irqsave(&priv->lock, flags); ... if (inta & IPW_INTA_BIT_RF_KILL_DONE) { ... cancel_delayed_work(&priv->request_scan); ... schedule_work(&priv->link_down); queue_delayed_work(priv->workqueue, &priv->rf_kill, 2 * HZ); handled |= IPW_INTA_BIT_RF_KILL_DONE; } ... spin_unlock_irqrestore(&priv->lock, flags); /* enable all interrupts */ ipw_enable_interrupts(priv);}

static int __devinitipw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent){ ... struct ipw_priv *priv; ... err = ipw_setup_deferred_work(priv); ... err = request_irq(pdev->irq, ipw_isr, IRQF_SHARED, DRV_NAME, priv); ...}

static int __devinitipw_setup_deferred_work(struct ipw_priv *priv){ priv->workqueue = create_workqueue(DRV_NAME); ... INIT_WORK(&priv->link_down, ipw_bg_link_down); ... tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) ipw_irq_tasklet, (unsigned long)priv); ...}

TASKLET

ISR

WORKQ

PROBE

Page 14: Introduction to Kernel Programming

What Address Space?!!!

● Flat space– Access to pointers

– Symbols

● Across the boundary– copy_to/copy_from

Page 15: Introduction to Kernel Programming

asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags){ struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; struct sockaddr_storage address; struct iovec *iov = iovstack; struct msghdr msg_sys; int err, iov_size, fput_needed;

... if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT;

... sock = sockfd_lookup_light(fd, &err, &fput_needed);

... iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); ... iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);

... err = verify_iovec(&msg_sys, iov, (struct sockaddr *)&address, VERIFY_READ);

... err = sock_sendmsg(sock, &msg_sys, total_len);

... return err;}

static struct socket *sock_from_file(struct file *file, int *err){ if (file->f_op == &socket_file_ops) return file->private_data; ...}

static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed){

struct file *file;struct socket *sock;

file = fget_light(fd, fput_needed);if (file) {

sock = sock_from_file(file, err);if (sock) return sock;fput_light(file, *fput_needed);

}return NULL;

}

#define files_fdtable(files) (rcu_dereference((files)->fdt))static inline void free_fdtable(struct fdtable *fdt){

call_rcu(&fdt->rcu, free_fdtable_rcu);}struct file *fget_light(unsigned int fd, int *fput_needed){ struct file *file; struct files_struct *files = current->files; *fput_needed = 0;

... rcu_read_lock(); file = fcheck_files(files, fd);

... rcu_read_unlock();

... return file;}static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd){ struct file * file = NULL; struct fdtable *fdt = files_fdtable(files);

... file = rcu_dereference(fdt->fd[fd]); return file;}

SOCKETS

FS

struct fdtable {...struct file ** fd; struct rcu_head rcu;...

};int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr){ if (copy_from_user(kaddr, uaddr, ulen)) return -EFAULT; .,,}

Page 16: Introduction to Kernel Programming

Allocation and flags

● Page Frame● Memory allocation

– Atomicity : GFP_ATOMIC from Reserved Pfs – no sleep

– Contiguity

– Region: GFP_HIGHMEM, GFP_DMA, GFP_KERNEL

● Slab allocator

Page 17: Introduction to Kernel Programming

Manipulating User memory

● Remapping page frames● Handling page faults

– Define vm_operations with a page fault handler

– Mark page frames to fault (e.g. fork in copy on write)

Page 18: Introduction to Kernel Programming

static intfb_mmap(struct file *file, struct vm_area_struct * vma){ int fbidx = iminor(file->f_path.dentry->d_inode); struct fb_info *info = registered_fb[fbidx]; unsigned long off; unsigned long start; u32 len;

... off = vma->vm_pgoff << PAGE_SHIFT; ... lock_kernel();

... /* frame buffer memory */ start = info->fix.smem_start; len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len);

... unlock_kernel(); start &= PAGE_MASK;

.... off += start; vma->vm_pgoff = off >> PAGE_SHIFT; vma->vm_flags |= VM_IO | VM_RESERVED;

... if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; return 0;}intregister_framebuffer(struct fb_info *fb_info){

... registered_fb[i] = fb_info; ... return 0;}

static int __devinit nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent)

{ struct fb_info *info;

info = framebuffer_alloc(sizeof(struct nvidia_par), &pd->dev);...

nvidiafb_fix.smem_start = pci_resource_start(pd, 1);...

if (register_framebuffer(info) < 0) { printk(KERN_ERR PFX "error registering nVidia framebuffer\n");

... }

... return 0;}

NVIDIA

FRAME BUFFER

Page 19: Introduction to Kernel Programming

Manipulating VMAstatic int snd_pcm_mmap_status_fault(struct vm_area_struct *area, struct vm_fault *vmf){ struct snd_pcm_substream *substream = area->vm_private_data; struct snd_pcm_runtime *runtime;

runtime = substream->runtime; vmf->page = virt_to_page(runtime->status); get_page(vmf->page); return 0;}static struct vm_operations_struct snd_pcm_vm_ops_status ={ .fault = snd_pcm_mmap_status_fault,};

static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area)

{ long size; if (!(area->vm_flags & VM_READ)) return -EINVAL;

size = area->vm_end - area->vm_start; if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status))) return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_status; area->vm_private_data = substream; area->vm_flags |= VM_RESERVED; return 0;}

Page 20: Introduction to Kernel Programming

I/O

● Control data:– I/O memory remapping

● Data transfer:– DMA

– PCI Scatter Gather

Page 21: Introduction to Kernel Programming

static intqla2x00_iospace_config(scsi_qla_host_t *ha){ resource_size_t pio;

if (pci_request_selected_regions(ha->pdev, ha->bars, QLA2XXX_DRIVER_NAME)) { goto iospace_error_exit; }

/* Use MMIO operations for all accesses. */ if (!(pci_resource_flags(ha->pdev, 1) & IORESOURCE_MEM)) { goto iospace_error_exit; } if (pci_resource_len(ha->pdev, 1) < MIN_IOBASE_LEN) { goto iospace_error_exit; }

ha->iobase = ioremap(pci_resource_start(ha->pdev, 1), MIN_IOBASE_LEN); if (!ha->iobase) { goto iospace_error_exit; }

return (0);

iospace_error_exit: return (-ENOMEM);}

#define WRT_REG_WORD(addr, data) writew(data,addr)#define RD_REG_WORD_RELAXED(addr) readw_relaxed(addr)#define ISP_REQ_Q_IN(ha, reg) \

(IS_QLA2100(ha) || IS_QLA2200(ha) ? \ &(reg)->u.isp2100.mailbox4 : \ &(reg)->u.isp2300.req_q_in)

intqla2x00_start_scsi(srb_t *sp){ scsi_qla_host_t *ha;

... if (scsi_sg_count(cmd)) { nseg = dma_map_sg(&ha->pdev->dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); } else nseg = 0;

... /* Set chip new ring index. */ WRT_REG_WORD(ISP_REQ_Q_IN(ha, reg), ha->req_ring_index); RD_REG_WORD_RELAXED(ISP_REQ_Q_IN(ha, reg)); /* PCI Posting. */}

Page 22: Introduction to Kernel Programming

Know your Subsystem

● Specific structures– Interface (entry points)

– The resource objects

● Specific registration interface● Specific objects

Page 23: Introduction to Kernel Programming

References

● Understanding the Linux Kernel (Daniel Bovet, Marco Cesati)

● Linux Device Drivers (Alessandro Rubini)● Linux Kernel Development (Robert Lowe)● Essential Linux Device Drivers

(Sreekrishman Venkateswaran)● Kernel Documentation● Code● http://www.gelato.unsw.edu.au/~dsw/public-

files/kernel-docs/kernel-api/