Linux Kernel: Manually Change Page Table Input Flags

I'm trying to manually mark a specific region of memory in a user-space process as non-cacheable (for educational purposes, not intended for use in production code) by setting a flag on the appropriate page table entries.

I have Ubuntu 14.04 (ASLR disabled) with Linux kernel running on Intel Skylake x86_64 processor.

In my kernel module, I have the following function:

/*                                                                                                                      
 * Set memory region [start,end], excluding 'addr', of process with PID 'pid' as uncacheable.                           
 */                                                                                                                     
ssize_t set_uncachable(uint32_t pid, uint64_t start, uint64_t end, uint64_t addr)                                       
{                                                                                                                       
    struct task_struct* ts = NULL;                                                                                      
    struct vm_area_struct *curr, *first = NULL;                                                                         
    struct mm_struct* mm;                                                                                               
    pgd_t * pgd;                                                                                                        
    pte_t * pte;                                                                                                        
    uint64_t numpages, curr_addr;                                                                                       
    uint32_t level, j, i = 0;                                                                                           

    printk(KERN_INFO "set_unacheable called\n");                                                                        

    ts = pid_task(find_vpid(pid), PIDTYPE_PID); //find task from PID                                                    
    pgd = ts->mm->pgd; //page table root of the task                                                                    

    first = ts->mm->mmap;                                                                                               
    curr = first;                                                                                                       
    if(first == NULL)                                                                                                   
        return -1;                                                                                                      

    do                                                                                                                  
    {                                                                                                                   
        printk(KERN_INFO "Region %3u [0x%016llx - 0x%016llx]", i, curr->vm_start, curr->vm_end);                        

        numpages = (curr->vm_end - curr->vm_start) / PAGE_SIZE; //PAGE_SIZE is 4K for now                               
        if(curr->vm_start > curr->vm_end)                                                                               
            numpages = 0;                                                                                               

        for(j = 0; j < numpages; j++)                                                                                   
        {                                                                                                               
            curr_addr = curr->vm_start + (PAGE_SIZE*j);                                                                 
            pte = lookup_address_in_pgd(pgd, curr_addr, &level);                                                        

            if((pte != NULL) && (level == 1))                                                                           
            {                                                                                                           
                printk(KERN_INFO "PTE for 0x%016x - 0x%016x (level %u)\n", curr_addr, pte->pte, level);                 
                if(curr_addr >= start && curr_addr < end && curr_addr != addr)                                             
                {                                                                                                          
                    //setting page entry to PAT#3                                                                          
                    pte->pte |= PWT_BIT | PCD_BIT;                                                                         
                    pte->pte &= ~PAT_BIT;                                                                               
                    printk(KERN_INFO "PTE for 0x%016x - 0x%016x (level %u) -- UPDATED\n", curr_addr, pte->pte, level);  
                }                                                                                                         
            }                                                                                                             
        }                                                                                                                 

        curr = curr->vm_next;                                                                                             
        if(curr == NULL)                                                                                                  
            return -1;                                                                                                    

        i++;                                                                                                              
    } while (curr != first);                                                                                              


    return 0;                                                                                                              
}  

      

To test the above code, I run an application that allocates a specific region in memory:

//#define BUF_ADDR_START 0x0000000008400000LL    /* works */                                                         
#define BUF_ADDR_START 0x00007ffff0000000LL      /* does not work */

[...]

buffer = mmap((void *)BUF_ADDR, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_POPULATE, 0, 0); 
if ( buffer == MAP_FAILED )                                                                                         
{                                                                                                                   
    printf("Failed to map buffer\n");                                                                               
    exit(-1);                                                                                                    
}                                                                                                                   
memset(buffer, 0, BUF_SIZE);                                                                                        
printf("Buffer at %p\n", buffer); 

      

I want to mark a fuzzy buffer using my kernel module. The code in my kernel module works for 0x8400000, but for 0x7ffff0000000 no entry in the page table was found (i.e. lookup_address_in_pgd returns NULL). However, the buffer is defined in the test program.

My kernel module seems to work for low addresses (code, data, and heap sections) but not for memory mapped to higher addresses (stack, shared libraries, etc.).

Anyone have an idea why it is not suitable for large addresses? Suggestions on how to implement set_uncachable more elegantly are also welcome; -)

Thank!

+3


source to share





All Articles