arch/sh64/mm/cache.c

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * arch/sh64/mm/cache.c
   7  *
   8  * Original version Copyright (C) 2000, 2001  Paolo Alberelli
   9  * Second version Copyright (C) benedict.gaster@superh.com 2002
  10  * Third version Copyright Richard.Curnow@superh.com 2003
  11  * Hacks to third version Copyright (C) 2003 Paul Mundt
  12  */
  13
  14 /****************************************************************************/
  15
  16 #include <linux/config.h>
  17 #include <linux/init.h>
  18 #include <linux/mman.h>
  19 #include <linux/mm.h>
  20 #include <linux/threads.h>
  21 #include <asm/page.h>
  22 #include <asm/pgtable.h>
  23 #include <asm/processor.h>
  24 #include <asm/cache.h>
  25 #include <asm/tlb.h>
  26 #include <asm/io.h>
  27 #include <asm/uaccess.h>
  28 #include <asm/mmu_context.h>
  29 #include <asm/pgalloc.h> /* for flush_itlb_range */
  30
  31 #include <linux/proc_fs.h>
  32
  33 /* This function is in entry.S */
  34 extern unsigned long switch_and_save_asid(unsigned long new_asid);
  35
  36 /* Wired TLB entry for the D-cache */
  37 static unsigned long long dtlb_cache_slot;
  38
  39 /**
  40  * sh64_cache_init()
  41  *
  42  * This is pretty much just a straightforward clone of the SH
  43  * detect_cpu_and_cache_system().
  44  *
  45  * This function is responsible for setting up all of the cache
  46  * info dynamically as well as taking care of CPU probing and
  47  * setting up the relevant subtype data.
  48  *
  49  * FIXME: For the time being, we only really support the SH5-101
  50  * out of the box, and don't support dynamic probing for things
  51  * like the SH5-103 or even cut2 of the SH5-101. Implement this
  52  * later!
  53  */
  54 int __init sh64_cache_init(void)
  55 {
  56         /*
  57          * First, setup some sane values for the I-cache.
  58          */
  59         cpu_data->icache.ways           = 4;
  60         cpu_data->icache.sets           = 256;
  61         cpu_data->icache.linesz         = L1_CACHE_BYTES;
  62
  63         /*
  64          * FIXME: This can probably be cleaned up a bit as well.. for example,
  65          * do we really need the way shift _and_ the way_step_shift ?? Judging
  66          * by the existing code, I would guess no.. is there any valid reason
  67          * why we need to be tracking this around?
  68          */
  69         cpu_data->icache.way_shift      = 13;
  70         cpu_data->icache.entry_shift    = 5;
  71         cpu_data->icache.set_shift      = 4;
  72         cpu_data->icache.way_step_shift = 16;
  73         cpu_data->icache.asid_shift     = 2;
  74
  75         /*
  76          * way offset = cache size / associativity, so just don't factor in
  77          * associativity in the first place..
  78          */
  79         cpu_data->icache.way_ofs        = cpu_data->icache.sets *
  80                                           cpu_data->icache.linesz;
  81
  82         cpu_data->icache.asid_mask      = 0x3fc;
  83         cpu_data->icache.idx_mask       = 0x1fe0;
  84         cpu_data->icache.epn_mask       = 0xffffe000;
  85         cpu_data->icache.flags          = 0;
  86
  87         /*
  88          * Next, setup some sane values for the D-cache.
  89          *
  90          * On the SH5, these are pretty consistent with the I-cache settings,
  91          * so we just copy over the existing definitions.. these can be fixed
  92          * up later, especially if we add runtime CPU probing.
  93          *
  94          * Though in the meantime it saves us from having to duplicate all of
  95          * the above definitions..
  96          */
  97         cpu_data->dcache                = cpu_data->icache;
  98
  99         /*
 100          * Setup any cache-related flags here
 101          */
 102 #if defined(CONFIG_DCACHE_WRITE_THROUGH)
 103         set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
 104 #elif defined(CONFIG_DCACHE_WRITE_BACK)
 105         set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
 106 #endif
 107
 108         /*
 109          * We also need to reserve a slot for the D-cache in the DTLB, so we
 110          * do this now ..
 111          */
 112         dtlb_cache_slot                 = sh64_get_wired_dtlb_entry();
 113
 114         return 0;
 115 }
 116
 117 #ifdef CONFIG_DCACHE_DISABLED
 118 #define sh64_dcache_purge_all()                                 do { } while (0)
 119 #define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)       do { } while (0)
 120 #define sh64_dcache_purge_user_range(mm, start, end)            do { } while (0)
 121 #define sh64_dcache_purge_phy_page(paddr)                       do { } while (0)
 122 #define sh64_dcache_purge_virt_page(mm, eaddr)                  do { } while (0)
 123 #define sh64_dcache_purge_kernel_range(start, end)              do { } while (0)
 124 #define sh64_dcache_wback_current_user_range(start, end)        do { } while (0)
 125 #endif
 126
 127 /*##########################################################################*/
 128
 129 /* From here onwards, a rewrite of the implementation,
 130    by Richard.Curnow@superh.com.
 131
 132    The major changes in this compared to the old version are;
 133    1. use more selective purging through OCBP instead of using ALLOCO to purge
 134       by natural replacement.  This avoids purging out unrelated cache lines
 135       that happen to be in the same set.
 136    2. exploit the APIs copy_user_page and clear_user_page better
 137    3. be more selective about I-cache purging, in particular use invalidate_all
 138       more sparingly.
 139
 140    */
 141
 142 /*##########################################################################
 143                                SUPPORT FUNCTIONS
 144   ##########################################################################*/
 145
 146 /****************************************************************************/
 147 /* The following group of functions deal with mapping and unmapping a temporary
 148    page into the DTLB slot that have been set aside for our exclusive use. */
 149 /* In order to accomplish this, we use the generic interface for adding and
 150    removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
 151 /****************************************************************************/
 152
 153 static unsigned long slot_own_flags;
 154
 155 static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
 156 {
 157         local_irq_save(slot_own_flags);
 158         sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
 159 }
 160
 161 static inline void sh64_teardown_dtlb_cache_slot(void)
 162 {
 163         sh64_teardown_tlb_slot(dtlb_cache_slot);
 164         local_irq_restore(slot_own_flags);
 165 }
 166
 167 /****************************************************************************/
 168
 169 #ifndef CONFIG_ICACHE_DISABLED
 170
 171 static void __inline__ sh64_icache_inv_all(void)
 172 {
 173         unsigned long long addr, flag, data;
 174         unsigned int flags;
 175
 176         addr=ICCR0;
 177         flag=ICCR0_ICI;
 178         data=0;
 179
 180         /* Make this a critical section for safety (probably not strictly necessary.) */
 181         local_irq_save(flags);
 182
 183         /* Without %1 it gets unexplicably wrong */
 184         asm volatile("getcfg    %3, 0, %0\n\t"
 185                         "or     %0, %2, %0\n\t"
 186                         "putcfg %3, 0, %0\n\t"
 187                         "synci"
 188                         : "=&r" (data)
 189                         : "0" (data), "r" (flag), "r" (addr));
 190
 191         local_irq_restore(flags);
 192 }
 193
 194 static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
 195 {
 196         /* Invalidate range of addresses [start,end] from the I-cache, where
 197          * the addresses lie in the kernel superpage. */
 198
 199         unsigned long long ullend, addr, aligned_start;
 200 #if (NEFF == 32)
 201         aligned_start = (unsigned long long)(signed long long)(signed long) start;
 202 #else
 203 #error "NEFF != 32"
 204 #endif
 205         aligned_start &= L1_CACHE_ALIGN_MASK;
 206         addr = aligned_start;
 207 #if (NEFF == 32)
 208         ullend = (unsigned long long) (signed long long) (signed long) end;
 209 #else
 210 #error "NEFF != 32"
 211 #endif
 212         while (addr <= ullend) {
 213                 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 214                 addr += L1_CACHE_BYTES;
 215         }
 216 }
 217
 218 static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
 219 {
 220         /* If we get called, we know that vma->vm_flags contains VM_EXEC.
 221            Also, eaddr is page-aligned. */
 222
 223         unsigned long long addr, end_addr;
 224         unsigned long flags = 0;
 225         unsigned long running_asid, vma_asid;
 226         addr = eaddr;
 227         end_addr = addr + PAGE_SIZE;
 228
 229         /* Check whether we can use the current ASID for the I-cache
 230            invalidation.  For example, if we're called via
 231            access_process_vm->flush_cache_page->here, (e.g. when reading from
 232            /proc), 'running_asid' will be that of the reader, not of the
 233            victim.
 234
 235            Also, note the risk that we might get pre-empted between the ASID
 236            compare and blocking IRQs, and before we regain control, the
 237            pid->ASID mapping changes.  However, the whole cache will get
 238            invalidated when the mapping is renewed, so the worst that can
 239            happen is that the loop below ends up invalidating somebody else's
 240            cache entries.
 241         */
 242
 243         running_asid = get_asid();
 244         vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
 245         if (running_asid != vma_asid) {
 246                 local_irq_save(flags);
 247                 switch_and_save_asid(vma_asid);
 248         }
 249         while (addr < end_addr) {
 250                 /* Worth unrolling a little */
 251                 asm __volatile__("icbi %0,  0" : : "r" (addr));
 252                 asm __volatile__("icbi %0, 32" : : "r" (addr));
 253                 asm __volatile__("icbi %0, 64" : : "r" (addr));
 254                 asm __volatile__("icbi %0, 96" : : "r" (addr));
 255                 addr += 128;
 256         }
 257         if (running_asid != vma_asid) {
 258                 switch_and_save_asid(running_asid);
 259                 local_irq_restore(flags);
 260         }
 261 }
 262
 263 /****************************************************************************/
 264
 265 static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 266                           unsigned long start, unsigned long end)
 267 {
 268         /* Used for invalidating big chunks of I-cache, i.e. assume the range
 269            is whole pages.  If 'start' or 'end' is not page aligned, the code
 270            is conservative and invalidates to the ends of the enclosing pages.
 271            This is functionally OK, just a performance loss. */
 272
 273         /* See the comments below in sh64_dcache_purge_user_range() regarding
 274            the choice of algorithm.  However, for the I-cache option (2) isn't
 275            available because there are no physical tags so aliases can't be
 276            resolved.  The icbi instruction has to be used through the user
 277            mapping.   Because icbi is cheaper than ocbp on a cache hit, it
 278            would be cheaper to use the selective code for a large range than is
 279            possible with the D-cache.  Just assume 64 for now as a working
 280            figure.
 281            */
 282
 283         int n_pages;
 284
 285         if (!mm) return;
 286
 287         n_pages = ((end - start) >> PAGE_SHIFT);
 288         if (n_pages >= 64) {
 289                 sh64_icache_inv_all();
 290         } else {
 291                 unsigned long aligned_start;
 292                 unsigned long eaddr;
 293                 unsigned long after_last_page_start;
 294                 unsigned long mm_asid, current_asid;
 295                 unsigned long long flags = 0ULL;
 296
 297                 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 298                 current_asid = get_asid();
 299
 300                 if (mm_asid != current_asid) {
 301                         /* Switch ASID and run the invalidate loop under cli */
 302                         local_irq_save(flags);
 303                         switch_and_save_asid(mm_asid);
 304                 }
 305
 306                 aligned_start = start & PAGE_MASK;
 307                 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
 308
 309                 while (aligned_start < after_last_page_start) {
 310                         struct vm_area_struct *vma;
 311                         unsigned long vma_end;
 312                         vma = find_vma(mm, aligned_start);
 313                         if (!vma || (aligned_start <= vma->vm_end)) {
 314                                 /* Avoid getting stuck in an error condition */
 315                                 aligned_start += PAGE_SIZE;
 316                                 continue;
 317                         }
 318                         vma_end = vma->vm_end;
 319                         if (vma->vm_flags & VM_EXEC) {
 320                                 /* Executable */
 321                                 eaddr = aligned_start;
 322                                 while (eaddr < vma_end) {
 323                                         sh64_icache_inv_user_page(vma, eaddr);
 324                                         eaddr += PAGE_SIZE;
 325                                 }
 326                         }
 327                         aligned_start = vma->vm_end; /* Skip to start of next region */
 328                 }
 329                 if (mm_asid != current_asid) {
 330                         switch_and_save_asid(current_asid);
 331                         local_irq_restore(flags);
 332                 }
 333         }
 334 }
 335
 336 static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
 337                                                 unsigned long start, int len)
 338 {
 339
 340         /* Invalidate a small range of user context I-cache, not necessarily
 341            page (or even cache-line) aligned. */
 342
 343         unsigned long long eaddr = start;
 344         unsigned long long eaddr_end = start + len;
 345         unsigned long current_asid, mm_asid;
 346         unsigned long long flags;
 347         unsigned long long epage_start;
 348
 349         /* Since this is used inside ptrace, the ASID in the mm context
 350            typically won't match current_asid.  We'll have to switch ASID to do
 351            this.  For safety, and given that the range will be small, do all
 352            this under cli.
 353
 354            Note, there is a hazard that the ASID in mm->context is no longer
 355            actually associated with mm, i.e. if the mm->context has started a
 356            new cycle since mm was last active.  However, this is just a
 357            performance issue: all that happens is that we invalidate lines
 358            belonging to another mm, so the owning process has to refill them
 359            when that mm goes live again.  mm itself can't have any cache
 360            entries because there will have been a flush_cache_all when the new
 361            mm->context cycle started. */
 362
 363         /* Align to start of cache line.  Otherwise, suppose len==8 and start
 364            was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
 365         eaddr = start & L1_CACHE_ALIGN_MASK;
 366         eaddr_end = start + len;
 367
 368         local_irq_save(flags);
 369         mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 370         current_asid = switch_and_save_asid(mm_asid);
 371
 372         epage_start = eaddr & PAGE_MASK;
 373
 374         while (eaddr < eaddr_end)
 375         {
 376                 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
 377                 eaddr += L1_CACHE_BYTES;
 378         }
 379         switch_and_save_asid(current_asid);
 380         local_irq_restore(flags);
 381 }
 382
 383 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 384 {
 385         /* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
 386            cache hit on the virtual tag the instruction ends there, without a
 387            TLB lookup. */
 388
 389         unsigned long long aligned_start;
 390         unsigned long long ull_end;
 391         unsigned long long addr;
 392
 393         ull_end = end;
 394
 395         /* Just invalidate over the range using the natural addresses.  TLB
 396            miss handling will be OK (TBC).  Since it's for the current process,
 397            either we're already in the right ASID context, or the ASIDs have
 398            been recycled since we were last active in which case we might just
 399            invalidate another processes I-cache entries : no worries, just a
 400            performance drop for him. */
 401         aligned_start = start & L1_CACHE_ALIGN_MASK;
 402         addr = aligned_start;
 403         while (addr < ull_end) {
 404                 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 405                 asm __volatile__ ("nop");
 406                 asm __volatile__ ("nop");
 407                 addr += L1_CACHE_BYTES;
 408         }
 409 }
 410
 411 #endif /* !CONFIG_ICACHE_DISABLED */
 412
 413 /****************************************************************************/
 414
 415 #ifndef CONFIG_DCACHE_DISABLED
 416
 417 /* Buffer used as the target of alloco instructions to purge data from cache
 418    sets by natural eviction. -- RPC */
 419 #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
 420 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 421
 422 /****************************************************************************/
 423
 424 static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 425 {
 426         /* Purge all ways in a particular block of sets, specified by the base
 427            set number and number of sets.  Can handle wrap-around, if that's
 428            needed.  */
 429
 430         int dummy_buffer_base_set;
 431         unsigned long long eaddr, eaddr0, eaddr1;
 432         int j;
 433         int set_offset;
 434
 435         dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
 436         set_offset = sets_to_purge_base - dummy_buffer_base_set;
 437
 438         for (j=0; j<n_sets; j++, set_offset++) {
 439                 set_offset &= (cpu_data->dcache.sets - 1);
 440                 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
 441
 442                 /* Do one alloco which hits the required set per cache way.  For
 443                    write-back mode, this will purge the #ways resident lines.   There's
 444                    little point unrolling this loop because the allocos stall more if
 445                    they're too close together. */
 446                 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 447                 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 448                         asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
 449                         asm __volatile__ ("synco"); /* TAKum03020 */
 450                 }
 451
 452                 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 453                 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 454                         /* Load from each address.  Required because alloco is a NOP if
 455                            the cache is write-through.  Write-through is a config option. */
 456                         if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
 457                                 *(volatile unsigned char *)(int)eaddr;
 458                 }
 459         }
 460
 461         /* Don't use OCBI to invalidate the lines.  That costs cycles directly.
 462            If the dummy block is just left resident, it will naturally get
 463            evicted as required.  */
 464
 465         return;
 466 }
 467
 468 /****************************************************************************/
 469
 470 static void sh64_dcache_purge_all(void)
 471 {
 472         /* Purge the entire contents of the dcache.  The most efficient way to
 473            achieve this is to use alloco instructions on a region of unused
 474            memory equal in size to the cache, thereby causing the current
 475            contents to be discarded by natural eviction.  The alternative,
 476            namely reading every tag, setting up a mapping for the corresponding
 477            page and doing an OCBP for the line, would be much more expensive.
 478            */
 479
 480         sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
 481
 482         return;
 483
 484 }
 485
 486 /****************************************************************************/
 487
 488 static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
 489 {
 490         /* Purge the range of addresses [start,end] from the D-cache.  The
 491            addresses lie in the superpage mapping.  There's no harm if we
 492            overpurge at either end - just a small performance loss. */
 493         unsigned long long ullend, addr, aligned_start;
 494 #if (NEFF == 32)
 495         aligned_start = (unsigned long long)(signed long long)(signed long) start;
 496 #else
 497 #error "NEFF != 32"
 498 #endif
 499         aligned_start &= L1_CACHE_ALIGN_MASK;
 500         addr = aligned_start;
 501 #if (NEFF == 32)
 502         ullend = (unsigned long long) (signed long long) (signed long) end;
 503 #else
 504 #error "NEFF != 32"
 505 #endif
 506         while (addr <= ullend) {
 507                 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
 508                 addr += L1_CACHE_BYTES;
 509         }
 510         return;
 511 }
 512
 513 /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
 514    anything else in the kernel */
 515 #define MAGIC_PAGE0_START 0xffffffffec000000ULL
 516
 517 static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
 518 {
 519         /* Purge the physical page 'paddr' from the cache.  It's known that any
 520            cache lines requiring attention have the same page colour as the the
 521            address 'eaddr'.
 522
 523            This relies on the fact that the D-cache matches on physical tags
 524            when no virtual tag matches.  So we create an alias for the original
 525            page and purge through that.  (Alternatively, we could have done
 526            this by switching ASID to match the original mapping and purged
 527            through that, but that involves ASID switching cost + probably a
 528            TLBMISS + refill anyway.)
 529            */
 530
 531         unsigned long long magic_page_start;
 532         unsigned long long magic_eaddr, magic_eaddr_end;
 533
 534         magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
 535
 536         /* As long as the kernel is not pre-emptible, this doesn't need to be
 537            under cli/sti. */
 538
 539         sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
 540
 541         magic_eaddr = magic_page_start;
 542         magic_eaddr_end = magic_eaddr + PAGE_SIZE;
 543         while (magic_eaddr < magic_eaddr_end) {
 544                 /* Little point in unrolling this loop - the OCBPs are blocking
 545                    and won't go any quicker (i.e. the loop overhead is parallel
 546                    to part of the OCBP execution.) */
 547                 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
 548                 magic_eaddr += L1_CACHE_BYTES;
 549         }
 550
 551         sh64_teardown_dtlb_cache_slot();
 552 }
 553
 554 /****************************************************************************/
 555
 556 static void sh64_dcache_purge_phy_page(unsigned long paddr)
 557 {
 558         /* Pure a page given its physical start address, by creating a
 559            temporary 1 page mapping and purging across that.  Even if we know
 560            the virtual address (& vma or mm) of the page, the method here is
 561            more elegant because it avoids issues of coping with page faults on
 562            the purge instructions (i.e. no special-case code required in the
 563            critical path in the TLB miss handling). */
 564
 565         unsigned long long eaddr_start, eaddr, eaddr_end;
 566         int i;
 567
 568         /* As long as the kernel is not pre-emptible, this doesn't need to be
 569            under cli/sti. */
 570
 571         eaddr_start = MAGIC_PAGE0_START;
 572         for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
 573                 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
 574
 575                 eaddr = eaddr_start;
 576                 eaddr_end = eaddr + PAGE_SIZE;
 577                 while (eaddr < eaddr_end) {
 578                         asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
 579                         eaddr += L1_CACHE_BYTES;
 580                 }
 581
 582                 sh64_teardown_dtlb_cache_slot();
 583                 eaddr_start += PAGE_SIZE;
 584         }
 585 }
 586
 587 static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
 588 {
 589         pgd_t *pgd;
 590         pmd_t *pmd;
 591         pte_t *pte;
 592         pte_t entry;
 593         unsigned long paddr;
 594
 595         /* NOTE : all the callers of this have mm->page_table_lock held, so the
 596            following page table traversal is safe even on SMP/pre-emptible. */
 597
 598         if (!mm) return; /* No way to find physical address of page */
 599         pgd = pgd_offset(mm, eaddr);
 600         if (pgd_bad(*pgd)) return;
 601
 602         pmd = pmd_offset(pgd, eaddr);
 603         if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
 604
 605         pte = pte_offset_kernel(pmd, eaddr);
 606         entry = *pte;
 607         if (pte_none(entry) || !pte_present(entry)) return;
 608
 609         paddr = pte_val(entry) & PAGE_MASK;
 610
 611         sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
 612
 613 }
 614 /****************************************************************************/
 615
 616 static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 617                           unsigned long start, unsigned long end)
 618 {
 619         /* There are at least 5 choices for the implementation of this, with
 620            pros (+), cons(-), comments(*):
 621
 622            1. ocbp each line in the range through the original user's ASID
 623               + no lines spuriously evicted
 624               - tlbmiss handling (must either handle faults on demand => extra
 625                 special-case code in tlbmiss critical path), or map the page in
 626                 advance (=> flush_tlb_range in advance to avoid multiple hits)
 627               - ASID switching
 628               - expensive for large ranges
 629
 630            2. temporarily map each page in the range to a special effective
 631               address and ocbp through the temporary mapping; relies on the
 632               fact that SH-5 OCB* always do TLB lookup and match on ptags (they
 633               never look at the etags)
 634               + no spurious evictions
 635               - expensive for large ranges
 636               * surely cheaper than (1)
 637
 638            3. walk all the lines in the cache, check the tags, if a match
 639               occurs create a page mapping to ocbp the line through
 640               + no spurious evictions
 641               - tag inspection overhead
 642               - (especially for small ranges)
 643               - potential cost of setting up/tearing down page mapping for
 644                 every line that matches the range
 645               * cost partly independent of range size
 646
 647            4. walk all the lines in the cache, check the tags, if a match
 648               occurs use 4 * alloco to purge the line (+3 other probably
 649               innocent victims) by natural eviction
 650               + no tlb mapping overheads
 651               - spurious evictions
 652               - tag inspection overhead
 653
 654            5. implement like flush_cache_all
 655               + no tag inspection overhead
 656               - spurious evictions
 657               - bad for small ranges
 658
 659            (1) can be ruled out as more expensive than (2).  (2) appears best
 660            for small ranges.  The choice between (3), (4) and (5) for large
 661            ranges and the range size for the large/small boundary need
 662            benchmarking to determine.
 663
 664            For now use approach (2) for small ranges and (5) for large ones.
 665
 666            */
 667
 668         int n_pages;
 669
 670         n_pages = ((end - start) >> PAGE_SHIFT);
 671         if (n_pages >= 64) {
 672 #if 1
 673                 sh64_dcache_purge_all();
 674 #else
 675                 unsigned long long set, way;
 676                 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 677                 for (set = 0; set < cpu_data->dcache.sets; set++) {
 678                         unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
 679                         for (way = 0; way < cpu_data->dcache.ways; way++) {
 680                                 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
 681                                 unsigned long long tag0;
 682                                 unsigned long line_valid;
 683
 684                                 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
 685                                 line_valid = tag0 & SH_CACHE_VALID;
 686                                 if (line_valid) {
 687                                         unsigned long cache_asid;
 688                                         unsigned long epn;
 689
 690                                         cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
 691                                         /* The next line needs some
 692                                            explanation.  The virtual tags
 693                                            encode bits [31:13] of the virtual
 694                                            address, bit [12] of the 'tag' being
 695                                            implied by the cache set index. */
 696                                         epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
 697
 698                                         if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
 699                                                 /* TODO : could optimise this
 700                                                    call by batching multiple
 701                                                    adjacent sets together. */
 702                                                 sh64_dcache_purge_sets(set, 1);
 703                                                 break; /* Don't waste time inspecting other ways for this set */
 704                                         }
 705                                 }
 706                         }
 707                 }
 708 #endif
 709         } else {
 710                 /* 'Small' range */
 711                 unsigned long aligned_start;
 712                 unsigned long eaddr;
 713                 unsigned long last_page_start;
 714
 715                 aligned_start = start & PAGE_MASK;
 716                 /* 'end' is 1 byte beyond the end of the range */
 717                 last_page_start = (end - 1) & PAGE_MASK;
 718
 719                 eaddr = aligned_start;
 720                 while (eaddr <= last_page_start) {
 721                         sh64_dcache_purge_user_page(mm, eaddr);
 722                         eaddr += PAGE_SIZE;
 723                 }
 724         }
 725         return;
 726 }
 727
 728 static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
 729 {
 730         unsigned long long aligned_start;
 731         unsigned long long ull_end;
 732         unsigned long long addr;
 733
 734         ull_end = end;
 735
 736         /* Just wback over the range using the natural addresses.  TLB miss
 737            handling will be OK (TBC) : the range has just been written to by
 738            the signal frame setup code, so the PTEs must exist.
 739
 740            Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
 741            it doesn't matter, even if the pid->ASID mapping changes whilst
 742            we're away.  In that case the cache will have been flushed when the
 743            mapping was renewed.  So the writebacks below will be nugatory (and
 744            we'll doubtless have to fault the TLB entry/ies in again with the
 745            new ASID), but it's a rare case.
 746            */
 747         aligned_start = start & L1_CACHE_ALIGN_MASK;
 748         addr = aligned_start;
 749         while (addr < ull_end) {
 750                 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
 751                 addr += L1_CACHE_BYTES;
 752         }
 753 }
 754
 755 /****************************************************************************/
 756
 757 /* These *MUST* lie in an area of virtual address space that's otherwise unused. */
 758 #define UNIQUE_EADDR_START 0xe0000000UL
 759 #define UNIQUE_EADDR_END   0xe8000000UL
 760
 761 static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
 762 {
 763         /* Given a physical address paddr, and a user virtual address
 764            user_eaddr which will eventually be mapped to it, create a one-off
 765            kernel-private eaddr mapped to the same paddr.  This is used for
 766            creating special destination pages for copy_user_page and
 767            clear_user_page */
 768
 769         static unsigned long current_pointer = UNIQUE_EADDR_START;
 770         unsigned long coloured_pointer;
 771
 772         if (current_pointer == UNIQUE_EADDR_END) {
 773                 sh64_dcache_purge_all();
 774                 current_pointer = UNIQUE_EADDR_START;
 775         }
 776
 777         coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
 778         sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
 779
 780         current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
 781
 782         return coloured_pointer;
 783 }
 784
 785 /****************************************************************************/
 786
 787 static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
 788 {
 789         void *coloured_to;
 790
 791         /* Discard any existing cache entries of the wrong colour.  These are
 792            present quite often, if the kernel has recently used the page
 793            internally, then given it up, then it's been allocated to the user.
 794            */
 795         sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 796
 797         coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 798         sh64_page_copy(from, coloured_to);
 799
 800         sh64_teardown_dtlb_cache_slot();
 801 }
 802
 803 static void sh64_clear_user_page_coloured(void *to, unsigned long address)
 804 {
 805         void *coloured_to;
 806
 807         /* Discard any existing kernel-originated lines of the wrong colour (as
 808            above) */
 809         sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 810
 811         coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 812         sh64_page_clear(coloured_to);
 813
 814         sh64_teardown_dtlb_cache_slot();
 815 }
 816
 817 #endif /* !CONFIG_DCACHE_DISABLED */
 818
 819 /****************************************************************************/
 820
 821 /*##########################################################################
 822                             EXTERNALLY CALLABLE API.
 823   ##########################################################################*/
 824
 825 /* These functions are described in Documentation/cachetlb.txt.
 826    Each one of these functions varies in behaviour depending on whether the
 827    I-cache and/or D-cache are configured out.
 828
 829    Note that the Linux term 'flush' corresponds to what is termed 'purge' in
 830    the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
 831    invalidate the cache lines, and 'invalidate' for the I-cache.
 832    */
 833
 834 #undef FLUSH_TRACE
 835
 836 void flush_cache_all(void)
 837 {
 838         /* Invalidate the entire contents of both caches, after writing back to
 839            memory any dirty data from the D-cache. */
 840         sh64_dcache_purge_all();
 841         sh64_icache_inv_all();
 842 }
 843
 844 /****************************************************************************/
 845
 846 void flush_cache_mm(struct mm_struct *mm)
 847 {
 848         /* Invalidate an entire user-address space from both caches, after
 849            writing back dirty data (e.g. for shared mmap etc). */
 850
 851         /* This could be coded selectively by inspecting all the tags then
 852            doing 4*alloco on any set containing a match (as for
 853            flush_cache_range), but fork/exit/execve (where this is called from)
 854            are expensive anyway. */
 855
 856         /* Have to do a purge here, despite the comments re I-cache below.
 857            There could be odd-coloured dirty data associated with the mm still
 858            in the cache - if this gets written out through natural eviction
 859            after the kernel has reused the page there will be chaos.
 860            */
 861
 862         sh64_dcache_purge_all();
 863
 864         /* The mm being torn down won't ever be active again, so any Icache
 865            lines tagged with its ASID won't be visible for the rest of the
 866            lifetime of this ASID cycle.  Before the ASID gets reused, there
 867            will be a flush_cache_all.  Hence we don't need to touch the
 868            I-cache.  This is similar to the lack of action needed in
 869            flush_tlb_mm - see fault.c. */
 870 }
 871
 872 /****************************************************************************/
 873
 874 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 875                        unsigned long end)
 876 {
 877         struct mm_struct *mm = vma->vm_mm;
 878
 879         /* Invalidate (from both caches) the range [start,end) of virtual
 880            addresses from the user address space specified by mm, after writing
 881            back any dirty data.
 882
 883            Note(1), 'end' is 1 byte beyond the end of the range to flush.
 884
 885            Note(2), this is called with mm->page_table_lock held.*/
 886
 887         sh64_dcache_purge_user_range(mm, start, end);
 888         sh64_icache_inv_user_page_range(mm, start, end);
 889 }
 890
 891 /****************************************************************************/
 892
 893 void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
 894 {
 895         /* Invalidate any entries in either cache for the vma within the user
 896            address space vma->vm_mm for the page starting at virtual address
 897            'eaddr'.   This seems to be used primarily in breaking COW.  Note,
 898            the I-cache must be searched too in case the page in question is
 899            both writable and being executed from (e.g. stack trampolines.)
 900
 901            Note(1), this is called with mm->page_table_lock held.
 902            */
 903
 904         sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 905
 906         if (vma->vm_flags & VM_EXEC) {
 907                 sh64_icache_inv_user_page(vma, eaddr);
 908         }
 909 }
 910
 911 /****************************************************************************/
 912
 913 #ifndef CONFIG_DCACHE_DISABLED
 914
 915 void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
 916 {
 917         /* 'from' and 'to' are kernel virtual addresses (within the superpage
 918            mapping of the physical RAM).  'address' is the user virtual address
 919            where the copy 'to' will be mapped after.  This allows a custom
 920            mapping to be used to ensure that the new copy is placed in the
 921            right cache sets for the user to see it without having to bounce it
 922            out via memory.  Note however : the call to flush_page_to_ram in
 923            (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
 924            very important case!
 925
 926            TBD : can we guarantee that on every call, any cache entries for
 927            'from' are in the same colour sets as 'address' also?  i.e. is this
 928            always used just to deal with COW?  (I suspect not). */
 929
 930         /* There are two possibilities here for when the page 'from' was last accessed:
 931            * by the kernel : this is OK, no purge required.
 932            * by the/a user (e.g. for break_COW) : need to purge.
 933
 934            If the potential user mapping at 'address' is the same colour as
 935            'from' there is no need to purge any cache lines from the 'from'
 936            page mapped into cache sets of colour 'address'.  (The copy will be
 937            accessing the page through 'from').
 938            */
 939
 940         if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
 941                 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
 942         }
 943
 944         if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 945                 /* No synonym problem on destination */
 946                 sh64_page_copy(from, to);
 947         } else {
 948                 sh64_copy_user_page_coloured(to, from, address);
 949         }
 950
 951         /* Note, don't need to flush 'from' page from the cache again - it's
 952            done anyway by the generic code */
 953 }
 954
 955 void clear_user_page(void *to, unsigned long address, struct page *page)
 956 {
 957         /* 'to' is a kernel virtual address (within the superpage
 958            mapping of the physical RAM).  'address' is the user virtual address
 959            where the 'to' page will be mapped after.  This allows a custom
 960            mapping to be used to ensure that the new copy is placed in the
 961            right cache sets for the user to see it without having to bounce it
 962            out via memory.
 963         */
 964
 965         if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 966                 /* No synonym problem on destination */
 967                 sh64_page_clear(to);
 968         } else {
 969                 sh64_clear_user_page_coloured(to, address);
 970         }
 971 }
 972
 973 #endif /* !CONFIG_DCACHE_DISABLED */
 974
 975 /****************************************************************************/
 976
 977 void flush_dcache_page(struct page *page)
 978 {
 979         sh64_dcache_purge_phy_page(page_to_phys(page));
 980         wmb();
 981 }
 982
 983 /****************************************************************************/
 984
 985 void flush_icache_range(unsigned long start, unsigned long end)
 986 {
 987         /* Flush the range [start,end] of kernel virtual adddress space from
 988            the I-cache.  The corresponding range must be purged from the
 989            D-cache also because the SH-5 doesn't have cache snooping between
 990            the caches.  The addresses will be visible through the superpage
 991            mapping, therefore it's guaranteed that there no cache entries for
 992            the range in cache sets of the wrong colour.
 993
 994            Primarily used for cohering the I-cache after a module has
 995            been loaded.  */
 996
 997         /* We also make sure to purge the same range from the D-cache since
 998            flush_page_to_ram() won't be doing this for us! */
 999
1000         sh64_dcache_purge_kernel_range(start, end);
1001         wmb();
1002         sh64_icache_inv_kernel_range(start, end);
1003 }
1004
1005 /****************************************************************************/
1006
1007 void flush_icache_user_range(struct vm_area_struct *vma,
1008                         struct page *page, unsigned long addr, int len)
1009 {
1010         /* Flush the range of user (defined by vma->vm_mm) address space
1011            starting at 'addr' for 'len' bytes from the cache.  The range does
1012            not straddle a page boundary, the unique physical page containing
1013            the range is 'page'.  This seems to be used mainly for invalidating
1014            an address range following a poke into the program text through the
1015            ptrace() call from another process (e.g. for BRK instruction
1016            insertion). */
1017
1018         sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1019         mb();
1020
1021         if (vma->vm_flags & VM_EXEC) {
1022                 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1023         }
1024 }
1025
1026 /*##########################################################################
1027                         ARCH/SH64 PRIVATE CALLABLE API.
1028   ##########################################################################*/
1029
1030 void flush_cache_sigtramp(unsigned long start, unsigned long end)
1031 {
1032         /* For the address range [start,end), write back the data from the
1033            D-cache and invalidate the corresponding region of the I-cache for
1034            the current process.  Used to flush signal trampolines on the stack
1035            to make them executable. */
1036
1037         sh64_dcache_wback_current_user_range(start, end);
1038         wmb();
1039         sh64_icache_inv_current_user_range(start, end);
1040 }
1041