113 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			113 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
| -----------------------------------------------------------------------------
 | |
| -- Little program that checks links in HTML files, using coroutines and
 | |
| -- non-blocking I/O via the dispatcher module.
 | |
| -- LuaSocket sample files
 | |
| -- Author: Diego Nehab
 | |
| -- RCS ID: $$
 | |
| -----------------------------------------------------------------------------
 | |
| local url = require("socket.url")
 | |
| local dispatch = require("dispatch")
 | |
| local http = require("socket.http")
 | |
| dispatch.TIMEOUT = 10
 | |
| 
 | |
| -- make sure the user knows how to invoke us
 | |
| arg = arg or {}
 | |
| if table.getn(arg) < 1 then
 | |
|     print("Usage:\n  luasocket check-links.lua [-n] {<url>}")
 | |
|     exit()
 | |
| end
 | |
| 
 | |
| -- '-n' means we are running in non-blocking mode
 | |
| if arg[1] == "-n" then
 | |
|     -- if non-blocking I/O was requested, use real dispatcher interface
 | |
|     table.remove(arg, 1)
 | |
|     handler = dispatch.newhandler("coroutine")
 | |
| else
 | |
|     -- if using blocking I/O, use fake dispatcher interface
 | |
|     handler = dispatch.newhandler("sequential")
 | |
| end
 | |
| 
 | |
| local nthreads = 0
 | |
| 
 | |
| -- get the status of a URL using the dispatcher
 | |
| function getstatus(link)
 | |
|     local parsed = url.parse(link, {scheme = "file"})
 | |
|     if parsed.scheme == "http" then
 | |
|         nthreads = nthreads + 1
 | |
|         handler:start(function()
 | |
|             local r, c, h, s = http.request{
 | |
|                 method = "HEAD",
 | |
|                 url = link,
 | |
|                 create = handler.tcp
 | |
|             }
 | |
|             if r and c == 200 then io.write('\t', link, '\n')
 | |
|             else io.write('\t', link, ': ', tostring(c), '\n') end
 | |
|             nthreads = nthreads - 1
 | |
|         end)
 | |
|     end
 | |
| end
 | |
| 
 | |
| function readfile(path)
 | |
|     path = url.unescape(path)
 | |
|     local file, error = io.open(path, "r")
 | |
|     if file then
 | |
|         local body = file:read("*a")
 | |
|         file:close()
 | |
|         return body
 | |
|     else return nil, error end
 | |
| end
 | |
| 
 | |
| function load(u)
 | |
|     local parsed = url.parse(u, { scheme = "file" })
 | |
|     local body, headers, code, error
 | |
|     local base = u
 | |
|     if parsed.scheme == "http" then
 | |
|         body, code, headers = http.request(u)
 | |
|         if code == 200 then
 | |
|             -- if there was a redirect, update base to reflect it
 | |
|             base = headers.location or base
 | |
|         end
 | |
|         if not body then
 | |
|             error = code
 | |
|         end
 | |
|     elseif parsed.scheme == "file" then
 | |
|         body, error = readfile(parsed.path)
 | |
|     else error = string.format("unhandled scheme '%s'", parsed.scheme) end
 | |
|     return base, body, error
 | |
| end
 | |
| 
 | |
| function getlinks(body, base)
 | |
|     -- get rid of comments
 | |
|     body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
 | |
|     local links = {}
 | |
|     -- extract links
 | |
|     body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
 | |
|         table.insert(links, url.absolute(base, href))
 | |
|     end)
 | |
|     body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
 | |
|         table.insert(links, url.absolute(base, href))
 | |
|     end)
 | |
|     string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
 | |
|         table.insert(links, url.absolute(base, href))
 | |
|     end)
 | |
|     return links
 | |
| end
 | |
| 
 | |
| function checklinks(address)
 | |
|     local base, body, error = load(address)
 | |
|     if not body then print(error) return end
 | |
|     print("Checking ", base)
 | |
|     local links = getlinks(body, base)
 | |
|     for _, link in ipairs(links) do
 | |
|         getstatus(link)
 | |
|     end
 | |
| end
 | |
| 
 | |
| for _, address in ipairs(arg) do
 | |
|     checklinks(url.absolute("file:", address))
 | |
| end
 | |
| 
 | |
| while nthreads > 0 do
 | |
|     handler:step()
 | |
| end
 |