Keys in threes...: Testing SyntaxHighlighter (videoPodcastGrabber.groovy)

I've set up my blog to use SyntaxHighlighter 2.0. Rather than just test it out with a "Hello World," here's a Groovy script I wrote a couple of years ago after attending JavaOne 2007 to download the a bunch of conference videos from a video podcast feed:

#!/usr/bin/env groovy

/**
 * Downloads a remote file to a local file.
 *
 * @param url URL representing remote file
 * @param localPath full path where file is to be stored locally
 * @param expectedSize the expected size of the file in bytes
 * @return two-item array:<ol>
 *      <li>true if succesful, false if error occurred
 *      <li>message describing success or failure
 */
def downloadFile(url, localPath, Long expectedSize) {
    def isOk = false
    def message = ''

    def file = new File(localPath)

    if (file.exists()) {
        actualSize = file.length()

        // If file exists and it's exactly the expected size, then we're done with this file.
        if (actualSize == expectedSize) {
            return [true, 'File with expected name and size already exists.']

        // If file already exists and it's bigger than expected, that's just weird.  Give up.
        } else if (actualSize > expectedSize) {
            return [false, "File ${localPath} exists with ${actualSize} bytes, bigger than expected ${expectedSize}; skipping."]

        // If file already exists and it's smaller than expected, assume it's an incomplete download and remove it.
        } else if (actualSize < expectedSize) {
            // TODO Switch to use resumable download instead of starting over.
            file.delete()
        }
    }

    // Download the file.
    def out = new BufferedOutputStream(new FileOutputStream(localPath))

    URL urlObj = new URL(url)
    try {
        out << urlObj.openStream()

        def actualSize = file.length()
        if (actualSize == expectedSize) {
            isOk = true
            message = 'Success.'
        } else {
            isOk = false
            message = "Got ${actualSize} bytes but expected ${expectedSize}!"
        }

    } catch(FileNotFoundException e) {
        isOk = false
        message = "Could not download ${url}"
    }

    out.close()
    return [isOk, message]
}

class VideoPodcastItem {
    String title
    String externalLink
    String filePath
    String description
    Date pubDate
    String guid

    String toString() {
        String fileExtension = filePath.tokenize('.')[-1]
        return """\
Title: <a href="${externalLink}">${title}</a><br>
Description: ${description}<br>
Media: [<a href="${filePath}">${fileExtension}</a>]"""
    }
}

// Define some settings
def nonSafeRegex = /[^\w]+/
def fileExtension = '.mp4'
def destDir = '.'
def feedUrl = 'http://public-xml.feedroom.com/public_rss/sun_podcast_rss.xml?channel_id=179d1f683cee84e1c425b1aa164b652b7602b131'
def includedItems = { it.enclosure.@type == 'video/mp4' }

// Fetch and parse XML w/ Groovy's XmlSlurper class
def rssNode = new XmlSlurper().parse(feedUrl)

// Traverse node tree to get list of MP4 video attachments
assert rssNode.channel.size() == 1
def channelNode = rssNode.channel[0]
assert channelNode.item.size() > 0
def items = channelNode.item
println "got ${items.size()} items"
def itemsWithVideo = items.grep(includedItems)
assert itemsWithVideo.size() > 0
println "got ${itemsWithVideo.size()} items with video"

// Download the attached video files.
def successCount = 0
processedItems = itemsWithVideo.collect{
    item = new VideoPodcastItem()
    item.title = it.title[0].text().trim()
    item.description = it.description[0].text().trim()
    item.externalLink = it.link[0].text().trim()
    def videoUrl = it.enclosure[0].@url.text().trim()
    item.guid = it.guid[0].text().trim()
    Long videoByteLength = Long.parseLong(it.enclosure[0].@length.text().trim())
    item.filePath = destDir + File.separatorChar + item.title.replaceAll(nonSafeRegex, '_') + '-' + item.guid + fileExtension
    println "'${item.title}' => '${videoUrl}' => '${item.filePath}' (${videoByteLength} bytes)"

    def result = downloadFile(videoUrl, item.filePath, videoByteLength)
    // TODO Come up with a more readable way which doesn't rely on arbitrary
    //      numeric indices to to pass the results back from the function.
    if (result[0]) {
        println "Done: ${result[1]}"
        successCount++
    } else {
        println "ERROR: ${result[1]}"
    }
    println ''

    item
}
assert processedItems.size() == itemsWithVideo.size()

// Construct HTML document linking to downloaded files:
indexHtmlFilePath = destDir + File.separatorChar + 'index.html'
indexHtmlFile = new BufferedOutputStream(new FileOutputStream(indexHtmlFilePath))

indexHtmlFile << """\
<html>
<head>
    <title>${rssNode.title[0].text().trim()}</title>
</head>
<body>

<p id="feedDescription">
${channelNode.description[0].text()}
</p>
"""

processedItems.each{
    indexHtmlFile << """\
<p class="videoPodcastItem">
${it}
</p>
"""
}

indexHtmlFile << """\
</body>
</html>
"""

indexHtmlFile.close()
println "Wrote: ${indexHtmlFilePath}"

// Display summary
println "== ${successCount} out of ${itemsWithVideo.size()} successfully download. =="
Keys in threes...

Monday, March 16, 2009

Testing SyntaxHighlighter (videoPodcastGrabber.groovy)

No comments:

Post a Comment

Archive