|
|
- 'use strict'
-
- const BB = require('bluebird')
-
- const contentPath = require('./content/path')
- const figgyPudding = require('figgy-pudding')
- const finished = BB.promisify(require('mississippi').finished)
- const fixOwner = require('./util/fix-owner')
- const fs = require('graceful-fs')
- const glob = BB.promisify(require('glob'))
- const index = require('./entry-index')
- const path = require('path')
- const rimraf = BB.promisify(require('rimraf'))
- const ssri = require('ssri')
-
- BB.promisifyAll(fs)
-
- const VerifyOpts = figgyPudding({
- concurrency: {
- default: 20
- },
- filter: {},
- log: {
- default: { silly () {} }
- }
- })
-
- module.exports = verify
- function verify (cache, opts) {
- opts = VerifyOpts(opts)
- opts.log.silly('verify', 'verifying cache at', cache)
- return BB.reduce([
- markStartTime,
- fixPerms,
- garbageCollect,
- rebuildIndex,
- cleanTmp,
- writeVerifile,
- markEndTime
- ], (stats, step, i) => {
- const label = step.name || `step #${i}`
- const start = new Date()
- return BB.resolve(step(cache, opts)).then(s => {
- s && Object.keys(s).forEach(k => {
- stats[k] = s[k]
- })
- const end = new Date()
- if (!stats.runTime) { stats.runTime = {} }
- stats.runTime[label] = end - start
- return stats
- })
- }, {}).tap(stats => {
- stats.runTime.total = stats.endTime - stats.startTime
- opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
- })
- }
-
- function markStartTime (cache, opts) {
- return { startTime: new Date() }
- }
-
- function markEndTime (cache, opts) {
- return { endTime: new Date() }
- }
-
- function fixPerms (cache, opts) {
- opts.log.silly('verify', 'fixing cache permissions')
- return fixOwner.mkdirfix(cache, cache).then(() => {
- // TODO - fix file permissions too
- return fixOwner.chownr(cache, cache)
- }).then(() => null)
- }
-
- // Implements a naive mark-and-sweep tracing garbage collector.
- //
- // The algorithm is basically as follows:
- // 1. Read (and filter) all index entries ("pointers")
- // 2. Mark each integrity value as "live"
- // 3. Read entire filesystem tree in `content-vX/` dir
- // 4. If content is live, verify its checksum and delete it if it fails
- // 5. If content is not marked as live, rimraf it.
- //
- function garbageCollect (cache, opts) {
- opts.log.silly('verify', 'garbage collecting content')
- const indexStream = index.lsStream(cache)
- const liveContent = new Set()
- indexStream.on('data', entry => {
- if (opts.filter && !opts.filter(entry)) { return }
- liveContent.add(entry.integrity.toString())
- })
- return finished(indexStream).then(() => {
- const contentDir = contentPath._contentDir(cache)
- return glob(path.join(contentDir, '**'), {
- follow: false,
- nodir: true,
- nosort: true
- }).then(files => {
- return BB.resolve({
- verifiedContent: 0,
- reclaimedCount: 0,
- reclaimedSize: 0,
- badContentCount: 0,
- keptSize: 0
- }).tap((stats) => BB.map(files, (f) => {
- const split = f.split(/[/\\]/)
- const digest = split.slice(split.length - 3).join('')
- const algo = split[split.length - 4]
- const integrity = ssri.fromHex(digest, algo)
- if (liveContent.has(integrity.toString())) {
- return verifyContent(f, integrity).then(info => {
- if (!info.valid) {
- stats.reclaimedCount++
- stats.badContentCount++
- stats.reclaimedSize += info.size
- } else {
- stats.verifiedContent++
- stats.keptSize += info.size
- }
- return stats
- })
- } else {
- // No entries refer to this content. We can delete.
- stats.reclaimedCount++
- return fs.statAsync(f).then(s => {
- return rimraf(f).then(() => {
- stats.reclaimedSize += s.size
- return stats
- })
- })
- }
- }, { concurrency: opts.concurrency }))
- })
- })
- }
-
- function verifyContent (filepath, sri) {
- return fs.statAsync(filepath).then(stat => {
- const contentInfo = {
- size: stat.size,
- valid: true
- }
- return ssri.checkStream(
- fs.createReadStream(filepath),
- sri
- ).catch(err => {
- if (err.code !== 'EINTEGRITY') { throw err }
- return rimraf(filepath).then(() => {
- contentInfo.valid = false
- })
- }).then(() => contentInfo)
- }).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false }))
- }
-
- function rebuildIndex (cache, opts) {
- opts.log.silly('verify', 'rebuilding index')
- return index.ls(cache).then(entries => {
- const stats = {
- missingContent: 0,
- rejectedEntries: 0,
- totalEntries: 0
- }
- const buckets = {}
- for (let k in entries) {
- if (entries.hasOwnProperty(k)) {
- const hashed = index._hashKey(k)
- const entry = entries[k]
- const excluded = opts.filter && !opts.filter(entry)
- excluded && stats.rejectedEntries++
- if (buckets[hashed] && !excluded) {
- buckets[hashed].push(entry)
- } else if (buckets[hashed] && excluded) {
- // skip
- } else if (excluded) {
- buckets[hashed] = []
- buckets[hashed]._path = index._bucketPath(cache, k)
- } else {
- buckets[hashed] = [entry]
- buckets[hashed]._path = index._bucketPath(cache, k)
- }
- }
- }
- return BB.map(Object.keys(buckets), key => {
- return rebuildBucket(cache, buckets[key], stats, opts)
- }, { concurrency: opts.concurrency }).then(() => stats)
- })
- }
-
- function rebuildBucket (cache, bucket, stats, opts) {
- return fs.truncateAsync(bucket._path).then(() => {
- // This needs to be serialized because cacache explicitly
- // lets very racy bucket conflicts clobber each other.
- return BB.mapSeries(bucket, entry => {
- const content = contentPath(cache, entry.integrity)
- return fs.statAsync(content).then(() => {
- return index.insert(cache, entry.key, entry.integrity, {
- metadata: entry.metadata,
- size: entry.size
- }).then(() => { stats.totalEntries++ })
- }).catch({ code: 'ENOENT' }, () => {
- stats.rejectedEntries++
- stats.missingContent++
- })
- })
- })
- }
-
- function cleanTmp (cache, opts) {
- opts.log.silly('verify', 'cleaning tmp directory')
- return rimraf(path.join(cache, 'tmp'))
- }
-
- function writeVerifile (cache, opts) {
- const verifile = path.join(cache, '_lastverified')
- opts.log.silly('verify', 'writing verifile to ' + verifile)
- try {
- return fs.writeFileAsync(verifile, '' + (+(new Date())))
- } finally {
- fixOwner.chownr.sync(cache, verifile)
- }
- }
-
- module.exports.lastRun = lastRun
- function lastRun (cache) {
- return fs.readFileAsync(
- path.join(cache, '_lastverified'), 'utf8'
- ).then(data => new Date(+data))
- }
|