Monday, March 16, 2009

Testing SyntaxHighlighter (videoPodcastGrabber.groovy)

I've set up my blog to use SyntaxHighlighter 2.0. Rather than just test it out with a "Hello World," here's a Groovy script I wrote a couple of years ago after attending JavaOne 2007 to download the a bunch of conference videos from a video podcast feed:


#!/usr/bin/env groovy

/**
* Downloads a remote file to a local file.
*
* @param url URL representing remote file
* @param localPath full path where file is to be stored locally
* @param expectedSize the expected size of the file in bytes
* @return two-item array:<ol>
* <li>true if succesful, false if error occurred
* <li>message describing success or failure
*/
def downloadFile(url, localPath, Long expectedSize) {
def isOk = false
def message = ''

def file = new File(localPath)

if (file.exists()) {
actualSize = file.length()

// If file exists and it's exactly the expected size, then we're done with this file.
if (actualSize == expectedSize) {
return [true, 'File with expected name and size already exists.']

// If file already exists and it's bigger than expected, that's just weird. Give up.
} else if (actualSize > expectedSize) {
return [false, "File ${localPath} exists with ${actualSize} bytes, bigger than expected ${expectedSize}; skipping."]

// If file already exists and it's smaller than expected, assume it's an incomplete download and remove it.
} else if (actualSize < expectedSize) {
// TODO Switch to use resumable download instead of starting over.
file.delete()
}
}

// Download the file.
def out = new BufferedOutputStream(new FileOutputStream(localPath))

URL urlObj = new URL(url)
try {
out << urlObj.openStream()

def actualSize = file.length()
if (actualSize == expectedSize) {
isOk = true
message = 'Success.'
} else {
isOk = false
message = "Got ${actualSize} bytes but expected ${expectedSize}!"
}

} catch(FileNotFoundException e) {
isOk = false
message = "Could not download ${url}"
}

out.close()
return [isOk, message]
}

class VideoPodcastItem {
String title
String externalLink
String filePath
String description
Date pubDate
String guid

String toString() {
String fileExtension = filePath.tokenize('.')[-1]
return """\
Title: <a href="${externalLink}">${title}</a><br>
Description: ${description}<br>
Media: [<a href="${filePath}">${fileExtension}</a>]"""
}
}

// Define some settings
def nonSafeRegex = /[^\w]+/
def fileExtension = '.mp4'
def destDir = '.'
def feedUrl = 'http://public-xml.feedroom.com/public_rss/sun_podcast_rss.xml?channel_id=179d1f683cee84e1c425b1aa164b652b7602b131'
def includedItems = { it.enclosure.@type == 'video/mp4' }

// Fetch and parse XML w/ Groovy's XmlSlurper class
def rssNode = new XmlSlurper().parse(feedUrl)

// Traverse node tree to get list of MP4 video attachments
assert rssNode.channel.size() == 1
def channelNode = rssNode.channel[0]
assert channelNode.item.size() > 0
def items = channelNode.item
println "got ${items.size()} items"
def itemsWithVideo = items.grep(includedItems)
assert itemsWithVideo.size() > 0
println "got ${itemsWithVideo.size()} items with video"

// Download the attached video files.
def successCount = 0
processedItems = itemsWithVideo.collect{
item = new VideoPodcastItem()
item.title = it.title[0].text().trim()
item.description = it.description[0].text().trim()
item.externalLink = it.link[0].text().trim()
def videoUrl = it.enclosure[0].@url.text().trim()
item.guid = it.guid[0].text().trim()
Long videoByteLength = Long.parseLong(it.enclosure[0].@length.text().trim())
item.filePath = destDir + File.separatorChar + item.title.replaceAll(nonSafeRegex, '_') + '-' + item.guid + fileExtension
println "'${item.title}' => '${videoUrl}' => '${item.filePath}' (${videoByteLength} bytes)"

def result = downloadFile(videoUrl, item.filePath, videoByteLength)
// TODO Come up with a more readable way which doesn't rely on arbitrary
// numeric indices to to pass the results back from the function.
if (result[0]) {
println "Done: ${result[1]}"
successCount++
} else {
println "ERROR: ${result[1]}"
}
println ''

item
}
assert processedItems.size() == itemsWithVideo.size()

// Construct HTML document linking to downloaded files:
indexHtmlFilePath = destDir + File.separatorChar + 'index.html'
indexHtmlFile = new BufferedOutputStream(new FileOutputStream(indexHtmlFilePath))

indexHtmlFile << """\
<html>
<head>
<title>${rssNode.title[0].text().trim()}</title>
</head>
<body>

<p id="feedDescription">
${channelNode.description[0].text()}
</p>
"""

processedItems.each{
indexHtmlFile << """\
<p class="videoPodcastItem">
${it}
</p>
"""
}

indexHtmlFile << """\
</body>
</html>
"""

indexHtmlFile.close()
println "Wrote: ${indexHtmlFilePath}"

// Display summary
println "== ${successCount} out of ${itemsWithVideo.size()} successfully download. =="

No comments:

Post a Comment