pw
  ruby
/pw/ruby/HTML.rb
# coding: utf-8
def H x # HTML
  case x
  when String
    x
  when Hash # element
    void = [:img, :input, :link, :meta].member? x[:_]
    '<' + (x[:_] || 'div').to_s +                        # element name
      (x.keys - [:_,:c]).map{|a|                         # attribute name
      ' ' + a.to_s + '=' + "'" + x[a].to_s.chars.map{|c| # attribute value
        {"'"=>'%27', '>'=>'%3E',
         '<'=>'%3C'}[c]||c}.join + "'"}.join +
      (void ? '/' : '') + '>' + (H x[:c]) +              # children
      (void ? '' : ('</'+(x[:_]||'div').to_s+'>'))       # element closer
  when Array # sequential structure
    x.map{|n|H n}.join
  when R # <a>
    H({_: :a, href: x.uri, c: x.label})
  when NilClass
    ''
  when FalseClass
    ''
  else
    CGI.escapeHTML x.to_s
  end
end

class R
  InlineMeta = [Title, Image, Abstract, Content, Label, DC+'hasFormat', DC+'link', SIOC+'attachment', SIOC+'user_agent', Stat+'contains']
  VerboseMeta = [DC+'identifier', DC+'source', DCe+'rights', DCe+'publisher', RSS+'comments', RSS+'em', RSS+'category', Atom+'edit', Atom+'self', Atom+'replies', Atom+'alternate', SIOC+'has_discussion', SIOC+'reply_of', SIOC+'num_replies', Mtime, Podcast+'explicit', Podcast+'summary', "http://wellformedweb.org/CommentAPI/commentRss","http://rssnamespace.org/feedburner/ext/1.0#origLink","http://purl.org/syndication/thread/1.0#total","http://search.yahoo.com/mrss/content",Harvard+'featured']

  HTML = -> graph, re {
    e = re.env
    e[:title] = graph[re.path+'#this'].do{|r|r[Title].justArray[0]}
    e[:label] = {}
    if q = re.q['q']
      Grep[graph,q]
    end
    foot = [{_: :style, c: "body {text-align:center;background-color:##{'%06x' % (rand 16777216)}}"}, {_: :span,style: 'font-size:12em;font-weight:bold',c: 404}, (CGI.escapeHTML e['HTTP_USER_AGENT'])] if graph.empty?
    H ["<!DOCTYPE html>\n",
       {_: :html,
        c: [{_: :head,
             c: [{_: :meta, charset: 'utf-8'}, {_: :title, c: e[:title]||re.path}, {_: :link, rel: :icon, href: '/.conf/icon.png'},
                 %w{code icons site}.map{|s|{_: :style, c: ".conf/#{s}.css".R.readFile}},
                 e[:Links].do{|links|
                   links.map{|type,uri|
                     {_: :link, rel: type, href: CGI.escapeHTML(uri.to_s)}}},
                 {_: :script, c: '.conf/site.js'.R.readFile}]},
            {_: :body,
             c: [Search[graph,re], (Tree[graph,re] unless re.basename=='msg'),
                 (Table[graph,re] unless graph.empty?),
                 {_: :style, c: e[:label].map{|name,_|
                    "[name=\"#{name}\"] {color:#000;background-color: #{'#%06x' % (rand 16777216)}}\n"}},
                 e[:Links][:down].do{|d|{_: :a, id: :down, c: '&#9660;', href: (CGI.escapeHTML d.to_s)}},
                 foot]}]}]}

  Search = -> graph,re {
    parts = re.path.split '/'
    path = ""
    grep = parts.size > 3 # suggest FIND closer to root, GREP for smaller subtrees
    # server offers @f -> find, @q -> grep explicit search-provider selection
    {class: :search,
     c: [re.env[:Links][:prev].do{|p|{_: :a, id: :prev, c: '&#9664;', href: (CGI.escapeHTML p.to_s)}},
         parts.map{|part|
           path = path + part + '/'
           {_: :a, id: 'p'+path.sha2, class: :pathPart, href: path + '?head', c: [CGI.escapeHTML(URI.unescape part),{_: :span, class: :sep, c: '/'}]}},
         (query = re.q['q'] || re.q['f']
          {_: :form,
           c: [{_: :a, class: :find, href: (query ? '?' : '') + '#searchbox' },
               {_: :input, id: :searchbox,
                name: grep ? 'q' : 'f',
                placeholder: grep ? :grep : :find
               }.update(query ? {value: query} : {})]} unless re.path=='/'),
         re.env[:Links][:next].do{|n|{_: :a, id: :next, c: '&#9654;', href: (CGI.escapeHTML n.to_s)}}]}}

  Tree = -> graph,re {
    tree = {}
    flat = {}
    hide = ['msg','/']
    # grow tree
    graph.keys.select{|k|!k.R.host && k[-1]=='/'}.map{|uri| # resources
      c = tree
      uri.R.parts.map{|name| # walk path
        c = c[name] ||= {}}} # update cursor to new position, creating node if necessary

    # (optional) only leaf-nodes
    flatten = -> t,path='' {
      t.keys.map{|k|
        cur = path+k+'/'
        if t[k].size > 0 # branching
          flatten[t[k], cur]
        else # leaf
          graph[cur].do{|c|
            graph[k+'/'] ||= {Size => 0}
            graph[k+'/'][Size] += c[Size].justArray[0]||0} # magnitude to bin
          flat[k] ||= {}
        end}}
    if re.q.has_key? 'flat'
      flatten[tree]
      tree = flat
    end

    # find max-size for scaling
    size = graph.values.map{|r|!hide.member?(r.R.basename) && r.has_key?('uri') && r.uri[-1]=='/' && r[Size].justArray[0] || 1}.max.to_f

    # link to container preview/summary
    qs = R.qs re.q.merge({'head'=>''})

    render = -> t,depth=0,path='' {
      label = 'p'+path.sha2
      re.env[:label][label] = true
      nodes = t.keys.-(hide).sort
      {_: :table, class: :tree, c: [
         {_: :tr, class: :name, c: nodes.map{|name| # node
            this = path + name + '/'
            s = nodes.size > 1 && graph[this].do{|r|r[Size].justArray[0]}
            height = (s && size) ? (8.8 * s / size) : 1.0
            {_: :td,
             c: {_: :a, href: this + qs, name: label, id: 't'+this.sha2,
                 style: s ? "height:#{height < 1.0 ? 1.0 : height}em" : "background-color:##{('%x' % rand(6))*3};color:#fff",
                 c: ['&nbsp;'*depth, CGI.escapeHTML(URI.unescape name)]}}}.intersperse("\n")},"\n",
         {_: :tr, c: nodes.map{|k| # child nodes
            graph[path+k+'/'].do{|r| graph.delete r.uri} # "consume" container so it doesnt appear again in tabular-list
            {_: :td,
             c: (render[t[k], depth+1, path+k+'/'] if t[k].size > 0)}}.intersperse("\n")}]}}

    render[tree]}

  Table = -> g, e {
    (1..10).map{|i|e.env[:label]["quote"+i.to_s] = true} # labels
    [:links,:images].map{|p| e.env[p] = []} # link & image lists
    p = e.q['sort'] || Date
    direction = e.q.has_key?('ascending') ? :id : :reverse
    datatype = [R::Size,R::Stat+'mtime'].member?(p) ? :to_i : :to_s
    keys = [Creator,To,Type,g.values.select{|v|v.respond_to? :keys}.map(&:keys)].flatten.uniq
    keys -= InlineMeta; keys -= VerboseMeta unless e.q.has_key? 'full'
    [{_: :table,
      c: [{_: :tbody,
           c: g.values.sort_by{|s|((p=='uri' ? (s[Title]||s[Label]||s.uri) : s[p]).justArray[0]||0).send datatype}.send(direction).map{|r|
             TableRow[r,e,p,direction,keys]}.intersperse("\n")},
          {_: :tr, c: keys.map{|k| # header row
             q = e.q.merge({'sort' => k})
             if direction == :id # direction toggle
               q.delete 'ascending'
             else
               q['ascending'] = ''
             end
             href = CGI.escapeHTML R.qs q
             {_: :th, property: k, class: k==p ? 'selected' : '',
              c: {_: :a,href: href,class: Icons[k]||'',c: Icons[k] ? '' : (k.R.fragment||k.R.basename)}}}}]},
     {_: :style, c: "[property=\"#{p}\"] {border-color:#444;border-style: solid; border-width: 0 0 .08em 0}"}]}

  TableRow = -> l,e,sort,direction,keys { this = l.R
    types = l.types
    chat = types.member? SIOC+'InstantMessage'
    mail = types.member? SIOC+'MailMessage'
    post = types.member? SIOC+'BlogPost'
    tweet = types.member? SIOC+'Tweet'
    href = this.uri
    head = e.q.has_key? 'head'
    rowID = (e.path == this.path && this.fragment) ? this.fragment : 'r'+href.sha2
    monospace = chat || mail || types.member?(SIOC+'SourceCode')
    date = l[Date].justArray.sort[-1]
    datePath = '/' + date[0..13].gsub(/[-T:]/,'/') if date
    titles = l[Title].justArray # explicit title
    if titles.empty? && this.path
      if chat || tweet # untitled
      else # doc title || URI
        titles.push(e.path==this.path && e.env[:title] || URI.unescape(this.uri))
      end
    end
    labels = l[Label].justArray
    this.host.do{|h|labels.unshift h}
    indexContext = -> v { v = v.R
      if mail
        {_: :a, id: 'address_'+rand.to_s.sha2, href: v.path + '?head#r' + href.sha2, c: v.label}
      elsif tweet
        {_: :a, href: datePath + '*twitter*#r' + href.sha2, c: v.label}
      elsif post
        {_: :a, href: datePath[0..-4] + '*/*' + (v.host||'') + '*?head#r' + href.sha2, c: v.label}
      else
        v
      end}
    unless head && titles.empty? && !l[Abstract]
      link = href + (!this.host && href[-1]=='/' && '?head' || '')
      {_: :tr, id: rowID, href: link,
       c: keys.map{|k|
         {_: :td, property: k,
          c: case k
             when 'uri'
               [titles.map{|t|[{_: :a, class: :title, href: link, c: (CGI.escapeHTML t.to_s)},' ']},
                labels.map{|v|
                  label = (v.respond_to?(:uri) ? (v.R.fragment || v.R.basename) : v).to_s
                  lbl = label.downcase.gsub(/[^a-zA-Z0-9_]/,'')
                  e.env[:label][lbl] = true
                  [{_: :a, class: :label, href: link, name: lbl, c: (CGI.escapeHTML label[0..41])},' ']},
                (links = [DC+'link', SIOC+'attachment', Stat+'contains'].map{|p|l[p]}.flatten.compact.map(&:R).select{|l|!e.env[:links].member? l} # unseen links
                 links.map{|l|e.env[:links].push l} # mark seen
                 {_: :table, class: :links,
                  c: links.group_by(&:host).map{|host,links|
                    tld = host.split('.')[-1] || '' if host
                    e.env[:label][tld] = true
                    {_: :tr,
                     c: [({_: :td, class: :host, name: tld,
                          c: {_: :a, href: '//'+host, c: host}} if host),
                         {_: :td, class: :path, colspan: host ? 1 : 2,
                          c: links.map{|link|
                            [{_: :a, id: 'link_'+rand.to_s.sha2, href: link.uri, c: CGI.escapeHTML(URI.unescape((link.host ? link.path : link.basename)||'')[0..64])},' ']}}]}}} unless links.empty?),
                l[Abstract],
                (l[Content].justArray.map{|c|monospace ? {_: :pre,c: c} : [c,' ']} unless head),
                (images = []
                 images.push this if types.member?(Image) # subject of triple
                 l[Image].do{|i|images.concat i}          # object of triple
                 images.map(&:R).select{|i|!e.env[:images].member? i}.map{|img| # unseen images
                   e.env[:images].push img # seen
                   {_: :a, class: :thumb, href: href,
                    c: {_: :img, src: if !img.host || e.host==img.host
                         img.path + '?preview'
                       else
                         img.uri
                        end}}})]
             when Type
               l[Type].justArray.uniq.select{|t|t.respond_to? :uri}.map{|t|
                 {_: :a, href: href, c: Icons[t.uri] ? '' : (t.R.fragment||t.R.basename), class: Icons[t.uri]}}
             when Size
               l[Size].do{|sz|
                 sum = 0
                 sz.justArray.map{|v|
                   sum += v.to_i}
                 sum}
             when Creator
               [l[k].justArray.map{|v|
                 if v.respond_to? :uri
                   indexContext[v]
                 else
                   CGI.escapeHTML v.to_s
                 end}.intersperse(' '),
                (l[SIOC+'user_agent'].do{|ua|
                   ['<br>', {_: :span, class: :notes, c: ua.join}]} unless head)]
             when SIOC+'addressed_to'
               l[k].justArray.map{|v|
                 if v.respond_to? :uri
                   indexContext[v]
                 else
                   CGI.escapeHTML v.to_s
                 end}.intersperse(' ')
             when Date
               {_: :a, class: :date, href: (datePath||'') + '#r' + href.sha2, c: date}
             when DC+'cache'
               l[k].justArray.map{|c|[{_: :a, href: c.path, class: :chain}, ' ']}
             else
               l[k].justArray.map{|v|v.respond_to?(:uri) ? v.R : CGI.escapeHTML(v.to_s)}.intersperse(' ')
             end}}.intersperse("\n")}
    end
  }

  Grep = -> graph, q {
    wordIndex = {}
    words = R.tokens q
    words.each_with_index{|word,i| wordIndex[word] = i }
    pattern = /(#{words.join '|'})/i
    # select resources
    graph.map{|u,r|
      keep = r.to_s.match(pattern) || r[Type] == Container
      graph.delete u unless keep}
    # highlight matches
    graph.values.map{|r|
      r[Content].justArray.map(&:lines).flatten.grep(pattern).do{|lines|
        r[Abstract] = [lines[0..5].map{|l|
          l.gsub(/<[^>]+>/,'')[0..512].gsub(pattern){|g| # capture match
            H({_: :span, class: "w#{wordIndex[g.downcase]}", c: g}) # wrap match
          }},{_: :hr}] if lines.size > 0 }}
    # CSS
    graph['#abstracts'] = {Abstract => {_: :style, c: wordIndex.values.map{|i|".w#{i} {background-color: #{'#%06x' % (rand 16777216)}; color: white}\n"}}}
    graph}

  StripHTML = -> body, loseTags=%w{iframe script style}, keepAttr=%w{alt href rel src title type} {
    html = Nokogiri::HTML.fragment body
    loseTags.map{|tag| html.css(tag).remove} if loseTags
    html.traverse{|e|
      e.attribute_nodes.map{|a|
        a.unlink unless keepAttr.member? a.name}} if keepAttr
    html.to_xhtml(:indent => 0)}

end
2017-11-23T03:54:35+00:00 13292
/pw/ruby/R
2017-11-23T02:45:49+00:00 453
/pw/ruby/MIME.rb
# coding: utf-8
class R

  # URIs
  W3 = 'http://www.w3.org/'
  OA = 'https://www.w3.org/ns/oa#'
  Purl = 'http://purl.org/'
  DC   = Purl + 'dc/terms/'
  DCe  = Purl + 'dc/elements/1.1/'
  SIOC = 'http://rdfs.org/sioc/ns#'
  Schema = 'http://schema.org/'
  Podcast = 'http://www.itunes.com/dtds/podcast-1.0.dtd#'
  Harvard  = 'http://harvard.edu/'
  Sound    = Purl + 'ontology/mo/Sound'
  Image    = DC + 'Image'
  RSS      = Purl + 'rss/1.0/'
  Date     = DC   + 'date'
  Title    = DC   + 'title'
  Abstract = DC   + 'abstract'
  Post     = SIOC + 'Post'
  To       = SIOC + 'addressed_to'
  From     = SIOC + 'has_creator'
  Creator  = SIOC + 'has_creator'
  Content  = SIOC + 'content'
  Stat     = W3   + 'ns/posix/stat#'
  Atom     = W3   + '2005/Atom#'
  Type     = W3 + '1999/02/22-rdf-syntax-ns#type'
  Label    = W3 + '2000/01/rdf-schema#label'
  Size     = Stat + 'size'
  Mtime    = Stat + 'mtime'
  Container = W3  + 'ns/ldp#Container'

  # prefix -> MIME
  MIMEprefix = {
    'authors' => 'text/plain',
    'changelog' => 'text/plain',
    'contributors' => 'text/plain',
    'copying' => 'text/plain',
    'install' => 'text/x-shellscript',
    'license' => 'text/plain',
    'readme' => 'text/markdown',
    'todo' => 'text/plain',
    'unlicense' => 'text/plain',
    'msg' => 'message/rfc822',
  }

  # suffix -> MIME
  MIMEsuffix = {
    'asc' => 'text/plain',
    'chk' => 'text/plain',
    'conf' => 'application/config',
    'desktop' => 'application/config',
    'doc' => 'application/msword',
    'docx' => 'application/msword+xml',
    'dat' => 'application/octet-stream',
    'db' => 'application/octet-stream',
    'e' => 'application/json',
    'eot' => 'application/font',
    'go' => 'application/go',
    'haml' => 'text/plain',
    'hs' => 'application/haskell',
    'ini' => 'text/plain',
    'ino' => 'application/ino',
    'md' => 'text/markdown',
    'msg' => 'message/rfc822',
    'list' => 'text/plain',
    'log' => 'text/chatlog',
    'ru' => 'text/plain',
    'rb' => 'application/ruby',
    'rst' => 'text/restructured',
    'sample' => 'application/config',
    'sh' => 'text/x-shellscript',
    'terminfo' => 'application/config',
    'tmp' => 'application/octet-stream',
    'ttl' => 'text/turtle',
    'u' => 'text/uri-list',
    'woff' => 'application/font',
    'yaml' => 'text/plain',
  }

  # MIME -> Triplr
  Triplr = {
    'application/config'   => [:triplrDataFile],
    'application/font'      => [:triplrFile],
    'application/go'   => [:triplrSourceCode],
    'application/haskell'   => [:triplrSourceCode],
    'application/javascript' => [:triplrSourceCode],
    'application/ino'      => [:triplrSourceCode],
    'application/json'      => [:triplrDataFile],
    'application/octet-stream' => [:triplrFile],
    'application/org'      => [:triplrOrg],
    'application/pdf'      => [:triplrFile],
    'application/makefile'      => [:triplrSourceCode],
    'application/msword'   => [:triplrWordDoc],
    'application/msword+xml' => [:triplrWordXML],
    'application/pkcs7-signature' => [:triplrFile],
    'application/rtf'      => [:triplrRTF],
    'application/ruby'     => [:triplrSourceCode],
    'application/sh'      => [:triplrSourceCode],
    'application/x-sh'     => [:triplrSourceCode],
    'application/xml'     => [:triplrDataFile],
    'application/x-executable' => [:triplrFile],
    'application/x-gzip'   => [:triplrArchive],
    'application/vnd.oasis.opendocument.text' => [:triplrOpenDocument],
    'audio/mpeg'           => [:triplrAudio],
    'audio/x-wav'          => [:triplrAudio],
    'audio/3gpp'           => [:triplrAudio],
    'image/bmp'            => [:triplrImage],
    'image/gif'            => [:triplrImage],
    'image/png'            => [:triplrImage],
    'image/svg+xml'        => [:triplrImage],
    'image/tiff'           => [:triplrImage],
    'image/jpeg'           => [:triplrImage],
    'inode/directory'      => [:triplrContainer],
    'message/rfc822'       => [:triplrMail],
    'text/cache-manifest'  => [:triplrText],
    'text/chatlog'         => [:triplrChatLog],
    'text/css'             => [:triplrSourceCode],
    'text/csv'             => [:triplrCSV,/,/],
    'text/html'            => [:triplrHTML],
    'text/man'             => [:triplrMan],
    'text/x-c'             => [:triplrSourceCode],
    'text/x-ruby'          => [:triplrSourceCode],
    'text/x-php'           => [:triplrSourceCode],
    'text/x-python'        => [:triplrSourceCode],
    'text/x-script.ruby'   => [:triplrSourceCode],
    'text/x-script.python' => [:triplrSourceCode],
    'text/x-shellscript'   => [:triplrFile],
    'text/markdown'        => [:triplrMarkdown],
    'text/nfo'             => [:triplrText,'cp437'],
    'text/plain'           => [:triplrText],
    'text/restructured'    => [:triplrMarkdown],
    'text/rtf'             => [:triplrRTF],
    'text/semicolon-separated-values' => [:triplrCSV,/;/],
    'text/tab-separated-values' => [:triplrCSV,/\t/],
    'text/uri-list'        => [:triplrUriList],
    'text/x-tex'           => [:triplrTeX],
  }

  # RDF type -> icon name (icon name to font mapping in icons.css)
  Icons = {
    'uri' => :id,
    Type => :type,
    Container => :dir,
    Content => :pencil,
    Date => :date,
    Label => :tag,
    Title => :title,
    Sound => :speaker,
    Image => :img,
    Size => :size,
    Mtime => :time,
    To => :userB,
    DC+'hasFormat' => :file,
    DC+'cache' => :chain,
    Schema+'Person' => :user,
    Schema+'location' => :location,
    Stat+'File' => :file,
    Stat+'Archive' => :archive,
    Stat+'HTMLFile' => :html,
    Stat+'WordDocument' => :word,
    Stat+'DataFile' => :tree,
    Stat+'TextFile' => :textfile,
    Stat+'width' => :width,
    Stat+'height' => :height,
    Stat+'container' => :dir,
    Stat+'contains' => :dir,
    SIOC+'BlogPost' => :pencil,
    SIOC+'ChatLog' => :comments,
    SIOC+'Discussion' => :comments,
    SIOC+'InstantMessage' => :comment,
    SIOC+'MicroblogPost' => :newspaper,
    SIOC+'WikiArticle' => :pencil,
    SIOC+'Usergroup' => :group,
    SIOC+'SourceCode' => :code,
    SIOC+'Tweet' => :bird,
    SIOC+'has_creator' => :user,
    SIOC+'user_agent' => :mailer,
    SIOC+'has_discussion' => :comments,
    SIOC+'Thread' => :openenvelope,
    SIOC+'Post' => :newspaper,
    SIOC+'MailMessage' => :envelope,
    W3+'2000/01/rdf-schema#Resource' => :node,
  }

  # file -> MIME
  def mime
    @mime ||= # memoize
      (name = path || ''
       prefix = ((File.basename name).split('.')[0]||'').downcase
       suffix = ((File.extname name)[1..-1]||'').downcase
       if node.directory? # container
         'inode/directory'
       elsif MIMEprefix[prefix] # prefix mapping
         MIMEprefix[prefix]
       elsif MIMEsuffix[suffix] # suffix mapping
         MIMEsuffix[suffix]
       elsif Rack::Mime::MIME_TYPES['.'+suffix] # suffix mapping (Rack fallback)
         Rack::Mime::MIME_TYPES['.'+suffix]
       else
         puts "#{pathPOSIX} unmapped MIME, sniffing content (SLOW)"
         `file --mime-type -b #{Shellwords.escape pathPOSIX.to_s}`.chomp
       end)
  end

  def R.tokens str; str ? str.scan(/[\w]+/).map(&:downcase).uniq : [] end
  Writable = %w{application/atom+xml text/html}
  def isRDF; %w{atom n3 rdf owl ttl}.member? ext end
  def toRDF; isRDF ? self : transcode end       # R -> R
  def to_json *a; {'uri' => uri}.to_json *a end # R -> Hash

  def load set # load Non-RDF + RDF to URI-indexed tree
    graph = RDF::Graph.new # graph
    g = {}                 # tree
    rdf,nonRDF = set.partition &:isRDF #partition on file type
    # load RDF
    rdf.map{|n|graph.load n.pathPOSIX, :base_uri => n}
    graph.each_triple{|s,p,o| # each triple
      s = s.to_s; p = p.to_s # subject, predicate
      o = [RDF::Node, RDF::URI, R].member?(o.class) ? o.R : o.value # object
      g[s] ||= {'uri'=>s} # new resource
      g[s][p] ||= []
      g[s][p].push o unless g[s][p].member? o} # RDF to tree
    # load nonRDF
    nonRDF.map{|n|
      n.transcode.do{|transcode| # transcode to RDF
        JSON.parse(transcode.readFile).map{|s,re| # subject
          re.map{|p,o| # predicate, objects
            o.justArray.map{|o| # object
              o = o.R if o.class==Hash
              g[s] ||= {'uri'=>s} # new resource
              g[s][p] ||= []; g[s][p].push o unless g[s][p].member? o} unless p == 'uri' }}}} # RDF to tree
    if q.has_key?('du') && path != '/' # DU: container storage-size attribute
      set.select{|d|d.node.directory?}.-([self]).map{|node|
        g[node.path+'/']||={}
        g[node.path+'/'][Size] = node.du}
    elsif (q.has_key?('f')||q.has_key?('q')) && path!='/' # FIND/GREP: container match-count attribute
      set.map{|r|
        bin = r.dirname + '/'
        g[bin] ||= {'uri' => bin, Type => Container}
        g[bin][Size] = 0 if !g[bin][Size] || g[bin][Size].class==Array
        g[bin][Size] += 1}
    end
    g
  end

  def loadRDF set # load RDF to RDF::Graph
    g = RDF::Graph.new; set.map{|n|g.load n.toRDF.pathPOSIX, :base_uri => n.stripDoc}
    g
  end

  def transcode # non-RDF to RDF using triplrs
    return self if ext == 'e'
    hash = node.stat.ino.to_s.sha2
    doc = R['/.cache/'+hash[0..2]+'/'+hash[3..-1]+'.e'].setEnv @r
    unless doc.e && doc.m > m
      tree = {}
      triplr = Triplr[mime]
      unless triplr
        puts "WARNING missing #{mime} triplr for #{uri}"
        triplr = :triplrFile
      end
      send(*triplr){|s,p,o|
        tree[s] ||= {'uri' => s}
        tree[s][p] ||= []
        tree[s][p].push o}
      doc.writeFile tree.to_json
    end
    doc
  rescue Exception => e
    puts uri, e.class, e.message
  end

  def nokogiri; Nokogiri::HTML.parse (open uri).read end
  def triplrArchive &f; yield uri, Type, R[Stat+'Archive']; triplrFile &f end
  def triplrAudio &f;   yield uri, Type, R[Sound]; triplrFile &f end
  def triplrHTML &f;    yield uri, Type, R[Stat+'HTMLFile']; triplrFile &f end
  def triplrDataFile &f; yield uri, Type, R[Stat+'DataFile']; triplrFile &f end
  def triplrSourceCode &f; yield uri, Type, R[SIOC+'SourceCode']; yield uri, Content, `pygmentize -f html #{sh}`; triplrFile &f end
  def triplrTeX;        yield stripDoc.uri, Content, `cat #{sh} | tth -r` end
  def triplrRTF          &f; triplrWord :catdoc,        &f end
  def triplrWordDoc      &f; triplrWord :antiword,      &f end
  def triplrWordXML      &f; triplrWord :docx2txt, '-', &f end
  def triplrOpenDocument &f; triplrWord :odt2txt,       &f end
  def triplrUriList; uris.map{|u|yield u, Type, R[W3+'2000/01/rdf-schema#Resource']} end
  def uris; open(pathPOSIX).readlines.map &:chomp end

  # POSIX map
  def R.fromPOSIX p; p.sub(/^\./,'').gsub(' ','%20').gsub('#','%23').R rescue '/'.R end
  def + u; R[uri + u.to_s].setEnv @r end
  def <=> c; to_s <=> c.to_s end
  def ==  u; to_s == u.to_s end
  def basename; File.basename (path||'') end
  def children; node.children.delete_if{|f|f.basename.to_s.index('.')==0}.map{|c|c.R.setEnv @r} end
  def dir; dirname.R end
  def dirname; File.dirname path end
  def exist?; node.exist? end
  def ext; (File.extname uri)[1..-1] || '' end
  def du; `du -s #{sh}| cut -f 1`.chomp.to_i end
  def find p; (p && !p.empty?) ? `find #{sh} -ipath #{('*'+p+'*').sh} | head -n 1024`.lines.map{|p|R.fromPOSIX p.chomp} : [] end
  def glob; (Pathname.glob pathPOSIX).map{|p|p.R.setEnv @r}.do{|g|g.empty? ? nil : g} end
  def label; fragment || (path && basename != '/' && (URI.unescape basename)) || host || '' end
  def ln x,y;   FileUtils.ln   x.node.expand_path, y.node.expand_path end
  def ln_s x,y; FileUtils.ln_s x.node.expand_path, y.node.expand_path end
  def match p; to_s.match p end
  def mkdir; FileUtils.mkdir_p pathPOSIX unless exist?; self end
  def mtime; node.stat.mtime end
  def node; @node ||= (Pathname.new pathPOSIX) end
  def pathPOSIX; @path ||= (URI.unescape(path[0]=='/' ? '.' + path : path)) end
  def parts; path ? path.split('/') : [] end
  def readFile; File.open(pathPOSIX).read end
  def shellPath; pathPOSIX.utf8.sh end
  def size; node.size rescue 0 end
  def stripDoc; R[uri.sub /\.(e|html|json|log|md|msg|ttl|txt)$/,''].setEnv(@r) end
  def writeFile o; dir.mkdir; File.open(pathPOSIX,'w'){|f|f << o}; self end

  alias_method :e, :exist?
  alias_method :m, :mtime
  alias_method :sh, :shellPath
  alias_method :uri, :to_s

  def triplrFile
    s = path
    size.do{|sz|yield s, Size, sz}
    mtime.do{|mt|
      yield s, Mtime, mt.to_i
      yield s, Date, mt.iso8601}
  end

  def triplrContainer
    s = path
    s = s + '/' unless s[-1] == '/'
    mt = mtime
    yield s, Type, R[Container]
    yield s, Mtime, mt.to_i
    yield s, Date, mt.iso8601
    containers,files = children.partition{|e|e.node.directory?}
    resources = files.map{|f|R[s+f.basename.gsub(/\.[0-9re\.]+\./,'.*.')].stripDoc}.sort.uniq
    containers.map{|d|yield s, Stat+'contains', d + '/'}
    resources.map{|f|yield s, Stat+'contains', f}
    yield s, Size, [*containers, *files].size
  end

  def triplrImage &f
    yield uri, Type, R[Image]
    w,h = Dimensions.dimensions pathPOSIX
    yield uri, Stat+'width', w
    yield uri, Stat+'height', h
    triplrFile &f
  end

  def triplrWord conv, out='', &f
    triplrFile &f
    yield uri, Type, R[Stat+'WordDocument']
    yield uri, Content, '<pre>' +
                        `#{conv} #{sh} #{out}` +
                        '</pre>'
  end

  def triplrText enc=nil, &f
    doc = stripDoc.uri
    yield doc, Type, R[Stat+'TextFile']
    mtime.do{|mt|
      yield doc, Date, mt.iso8601}
    yield doc, DC+'hasFormat', self
    yield doc, Content,
    H({_: :pre, style: 'white-space: pre-wrap',
        c: readFile.do{|r| enc ? r.force_encoding(enc).to_utf8 : r}.hrefs})
  rescue Exception => e
    puts uri, e.class, e.message
  end

  def triplrMarkdown
    doc = stripDoc.uri
    yield doc, Type, R[Stat+'TextFile']
    yield doc, Content, ::Redcarpet::Markdown.new(::Redcarpet::Render::Pygment, fenced_code_blocks: true).render(readFile)
    mtime.do{|mt|yield doc, Date, mt.iso8601}
  end

  def triplrCSV d
    ns    = W3 + 'ns/csv#'
    lines = CSV.read pathPOSIX
    lines[0].do{|fields| # header-row
      yield uri, Type, R[ns+'Table']
      yield uri, ns+'rowCount', lines.size
      lines[1..-1].each_with_index{|row,line|
        row.each_with_index{|field,i|
          id = uri + '#row:' + line.to_s
          yield id, fields[i], field
          yield id, Type, R[ns+'Row']}}}
  end

  def triplrChatLog &f
    linenum = -1
    base = stripDoc
    dir = base.dir
    log = base.uri
    basename = base.basename
    channel = dir + '/' + basename
    network = dir + '/' + basename.split('%23')[0] + '*'
    day = dir.uri.match(/\/(\d{4}\/\d{2}\/\d{2})/).do{|d|d[1].gsub('/','-')}
    readFile.lines.map{|l|
      l.scan(/(\d\d)(\d\d)(\d\d)[\s+@]*([^\(\s]+)[\S]* (.*)/){|m|
        s = base + '#l' + (linenum += 1).to_s
        yield s, Type, R[SIOC+'InstantMessage']
        yield s, Label, m[3]
        yield s, Creator, R['#'+m[3]]
        yield s, To, channel
        yield s, Content, m[4].hrefs{|p, o|
          yield log, p, o
          yield s, p, o
        }
        yield s, Date, day+'T'+m[0]+':'+m[1]+':'+m[2] if day}}
    if linenum > 0 # summarize at log-URI
      yield log, Type, R[SIOC+'ChatLog']
      yield log, Date, mtime.iso8601
      yield log, Creator, channel
      yield log, To, network
      yield log, Title, basename.split('%23')[-1] # channel
      yield log, Size, linenum
    end
  rescue Exception => e
    puts uri, e.class, e.message
  end

  MessageURI = -> id { h=id.sha2; ['', 'msg', h[0], h[1], h[2], id.gsub(/[^a-zA-Z0-9]+/,'.')[0..96], '#this'].join('/').R}
  def triplrMail &b
    m = Mail.read node; return unless m # open message-file
    id = m.message_id || m.resent_message_id || rand.to_s.sha2 # Message-ID
    resource = MessageURI[id]; e = resource.uri                # Message URI
    srcDir = resource.path.R; srcDir.mkdir # container
    srcFile = srcDir + 'this.msg'          # found location
    ln self, srcFile unless srcFile.e rescue nil # canonical location
    yield e, DC+'identifier', id         # pre-web identifier
    yield e, DC+'cache', self + '*' # source file
    yield e, Type, R[SIOC+'MailMessage'] # RDF type

    # From
    from = []
    m.from.do{|f|f.justArray.map{|f|from.push f.to_utf8.downcase if f}} # queue for indexing
    m[:from].do{|fr|
      fr.addrs.map{|a|yield e, Creator, a.display_name||a.name} if fr.respond_to? :addrs} # creator name
    m['X-Mailer'].do{|m|yield e, SIOC+'user_agent', m.to_s}

    # To
    to = []
    %w{to cc bcc resent_to}.map{|p|      # recipient fields
      m.send(p).justArray.map{|r|        # recipient
        to.push r.to_utf8.downcase }}    # queue for indexing
    m['X-BeenThere'].justArray.map{|r|to.push r.to_s} # anti-loop recipient
    m['List-Id'].do{|name|yield e, To, name.decoded.sub(/<[^>]+>/,'').gsub(/[<>&]/,'')} # mailinglist name

    # Subject
    subject = nil
    m.subject.do{|s|
      subject = s.to_utf8.gsub(/\[[^\]]+\]/){|l|
        yield e, Label, l[1..-2]; nil} # emit []-wrapped tokens as RDF labels
      yield e, Title, subject}

    # Date
    date = m.date || Time.now rescue Time.now
    date = date.to_time.utc
    dstr = date.iso8601
    yield e, Date, dstr
    dpath = '/' + dstr[0..6].gsub('-','/') + '/msg/' # month
    [*from,*to].map{|addr| # addresses
      user, domain = addr.split '@'
      if user && domain
        apath = dpath + domain + '/' + user # address
        yield e, (from.member? addr) ? Creator : To, R[apath+'#'+user]
        if subject
          slug = R.tokens(subject).join('.')[0..63]
          mpath = apath + '.' + dstr[8..-1].gsub(/[^0-9]+/,'.') + slug # time & subject
          mpath = mpath + (mpath[-1] == '.' ? '' : '.')  + 'msg' # file-type extension
          mdir = '../.mail/' + domain + '/' # maildir
          %w{cur new tmp}.map{|c| R[mdir + c].mkdir} # maildir container
          mloc = R[mdir + 'cur/' + id.sha2 + '.msg'] # maildir entry
          iloc = mpath.R # index entry
          [iloc,mloc].map{|loc| loc.dir.mkdir # container
            ln self, loc unless loc.e rescue nil} # link
        end
      end
    }

    %w{in_reply_to references}.map{|ref|
      m.send(ref).do{|rs|
        rs.justArray.map{|r|
          dest = MessageURI[r]
          yield e, SIOC+'reply_of', dest
          destDir = dest.path.R; destDir.mkdir; destFile = destDir+'this.msg'
          # bidirectional reference link
          rev = destDir + id.sha2 + '.msg'
          rel = srcDir + r.sha2 + '.msg'
          if !rel.e # link missing
            if destFile.e # exists, create link
              ln destFile, rel rescue nil
            else # point to message anyway in case it appears
              ln_s destFile, rel rescue nil
            end
          end
          ln srcFile, rev if !rev.e rescue nil}}}
    # part handling
    htmlFiles, parts = m.all_parts.push(m).partition{|p|p.mime_type=='text/html'}
    htmlCount = 0
    htmlFiles.map{|p| # HTML file
      html = srcDir + "#{htmlCount}.html"  # file location
      yield e, DC+'hasFormat', html        # file pointer
      html.writeFile p.decoded  if !html.e # store HTML email
      htmlCount += 1 } # increment count
    parts.select{|p|
      (!p.mime_type || p.mime_type == 'text/plain') && # text parts
        Mail::Encodings.defined?(p.body.encoding)      # decodable?
    }.map{|p|
      yield e, Content, (H p.decoded.to_utf8.lines.to_a.map{|l| # split lines
        l = l.chomp # strip any remaining [\n\r]
        if qp = l.match(/^((\s*[>|]\s*)+)(.*)/) # quoted line
          depth = (qp[1].scan /[>|]/).size # > count
          if qp[3].empty? # drop blank quotes
            nil
          else # wrap quotes in <span>
            indent = "<span name='quote#{depth}'>&gt;</span>"
            {_: :span, class: :quote,
             c: [indent * depth,' ',
                 {_: :span, class: :quoted, c: qp[3].gsub('@','').hrefs{|p,o|yield e, p, o}}]}
          end
        else # fresh line
          [l.gsub(/(\w+)@(\w+)/,'\2\1').hrefs{|p,o|yield e, p, o}]
        end}.compact.intersperse("\n"))} # join lines
    parts.select{|p|p.mime_type=='message/rfc822'}.map{|m|
      content = m.body.decoded                   # decode message-part
      f = srcDir + content.sha2 + '.inlined.msg' # message location
      f.writeFile content if !f.e                # store message
      f.triplrMail &b}                           # recursion on message-part
    m.attachments.select{|p|Mail::Encodings.defined?(p.body.encoding)}.map{|p|
      name = p.filename.do{|f|f.to_utf8.do{|f|!f.empty? && f}} ||                           # explicit name
             (rand.to_s.sha2 + (Rack::Mime::MIME_TYPES.invert[p.mime_type] || '.bin').to_s) # generated name
      file = srcDir + name                     # file location
      file.writeFile p.body.decoded if !file.e # store
      yield e, SIOC+'attachment', file         # file pointer
      if p.main_type=='image'                  # image attachments
        yield e, Image, file                   # image link represented in RDF
        yield e, Content,                      # image link represented in HTML
          H({_: :a, href: file.uri, c: [{_: :img, src: file.uri}, p.filename]}) # render HTML
      end }
  end

  def fetchFeed
    head = {} # request header
    cache = R['/.cache/'+uri.sha2+'/'] # storage
    etag = cache + 'etag'      # cache etag URI
    priorEtag = nil            # cache etag value
    mtime = cache + 'mtime'    # cache mtime URI
    priorMtime = nil           # cache mtime value
    body = cache + 'body.atom' # cache body URI
    if etag.e
      priorEtag = etag.readFile
      head["If-None-Match"] = priorEtag unless priorEtag.empty?
    elsif mtime.e
      priorMtime = mtime.readFile.to_time
      head["If-Modified-Since"] = priorMtime.httpdate
    end
    begin # conditional GET
      open(uri, head) do |response|
        curEtag = response.meta['etag']
        curMtime = response.last_modified || Time.now rescue Time.now
        etag.writeFile curEtag if curEtag && !curEtag.empty? && curEtag != priorEtag # new ETag value
        mtime.writeFile curMtime.iso8601 if curMtime != priorMtime # new Last-Modified value
        resp = response.read
        unless body.e && body.readFile == resp
          body.writeFile resp # new cached body
          ('file:'+body.pathPOSIX).R.indexFeed :format => :feed, :base_uri => uri # run indexer
        end
      end
    rescue OpenURI::HTTPError => error
      msg = error.message
      puts [uri,msg].join("\t") unless msg.match(/304/)
    end
  rescue Exception => e
    puts uri, e.class, e.message
  end
  def fetchFeeds; uris.map(&:R).map &:fetchFeed end
  def feeds; (nokogiri.css 'link[rel=alternate]').map{|u|join u.attr :href} end
  alias_method :getFeed, :fetchFeed

  def indexFeed options = {}
    g = RDF::Repository.load self, options
    g.each_graph.map{|graph|
      graph.query(RDF::Query::Pattern.new(:s,R[R::Date],:o)).first_value.do{|t| # find timestamp
        time = t.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
        slug = (graph.name.to_s.sub(/https?:\/\//,'.').gsub(/[\W_]/,'..').sub(/\d{12,}/,'')+'.').gsub(/\.+/,'.')[0..127].sub(/\.$/,'')
        doc =  R["/#{time}#{slug}.ttl"]
        unless doc.e
          doc.dir.mkdir
          cacheBase = doc.stripDoc
          graph << RDF::Statement.new(graph.name, R[DC+'cache'], cacheBase)
          RDF::Writer.open(doc.pathPOSIX){|f|f << graph}
          puts cacheBase
        end
        true}}
    self
  rescue Exception => e
    puts uri, e.class, e.message
  end

  # Reader for JSON-cache format
  module Format
    class Format < RDF::Format
      content_type     'application/json+rdf', :extension => :e
      content_encoding 'utf-8'
      reader { R::Format::Reader }
    end
    class Reader < RDF::Reader
      format Format
      def initialize(input = $stdin, options = {}, &block)
        @graph = JSON.parse (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read
        @base = options[:base_uri]
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end
      def each_statement &fn
        @graph.map{|s,r|
          r.map{|p,o|
            o.justArray.map{|o|
              fn.call RDF::Statement.new(@base.join(s), RDF::URI(p),
                        o.class==Hash ? @base.join(o['uri']) : (l = RDF::Literal o
                                                              l.datatype=RDF.XMLLiteral if p == Content
                                                              l))} unless p=='uri'}}
      end
      def each_triple &block; each_statement{|s| block.call *s.to_triple} end
    end
  end

  # Reader for Atom and RSS
  module Feed
    class Format < RDF::Format
      content_type     'application/atom+xml', :extension => :atom
      content_encoding 'utf-8'
      reader { R::Feed::Reader }
    end
    class Reader < RDF::Reader
      format Format
      def initialize(input = $stdin, options = {}, &block)
        @doc = (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read.utf8
        @base = options[:base_uri]
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end
      def each_triple &block; each_statement{|s| block.call *s.to_triple} end
      def each_statement &fn # triples flow (left ← right)
        resolveURIs(:normalizeDates, :normalizePredicates,:rawTriples){|s,p,o|
          fn.call RDF::Statement.new(s.R, p.R,
                                     (o.class == R || o.class == RDF::URI) ? o : (l = RDF::Literal (if p == Content
                                                                             R::StripHTML[o]
                                                                           else
                                                                             o.gsub(/<[^>]*>/,' ')
                                                                           end)
                                                         l.datatype=RDF.XMLLiteral if p == Content
                                                         l), :graph_name => s.R)}
      end
      def resolveURIs *f
        send(*f){|s,p,o|
          if p==Content && o.class==String
            content = Nokogiri::HTML.fragment o
            content.css('img').map{|i|
              (i.attr 'src').do{|src|
                yield s, Image, src.R }}
            content.css('a').map{|a|
              (a.attr 'href').do{|href|
                link = s.R.join href
                a.set_attribute 'href', link
                yield s, DC+'link', link
                yield s, Image, link if %w{gif jpg png webp}.member? link.R.ext.downcase
              }}
            yield s, p, content.to_xhtml
          else
            yield s, p, o
          end
        }
      end
      def normalizePredicates *f
        send(*f){|s,p,o|
          yield s,
                {Atom+'content' => Content,
                 Atom+'displaycategories' => Label,
                 Atom+'enclosure' => SIOC+'attachment',
                 Atom+'summary' => Content,
                 Atom+'title' => Title,
                 DCe+'subject' => Title,
                 DCe+'type' => Type,
                 Harvard+'WPID' => Label,
                 Harvard+'affiliation' => Creator,
                 Harvard+'author' => Creator,
                 Harvard+'subtitle' => Title,
                 Podcast+'author' => Creator,
                 Podcast+'keywords' => Label,
                 Podcast+'subtitle' => Title,
                 RSS+'category' => Label,
                 RSS+'description' => Content,
                 RSS+'encoded' => Content,
                 RSS+'modules/content/encoded' => Content,
                 RSS+'modules/slash/comments' => SIOC+'num_replies',
                 RSS+'source' => DC+'source',
                 RSS+'title' => Title,
                }[p]||p, o }
      end
      def normalizeDates *f
        send(*f){|s,p,o|
          yield *({'CreationDate' => true,
                    'Date' => true,
                    RSS+'pubDate' => true,
                    Date => true,
                    DCe+'date' => true,
                    Atom+'published' => true,
                    Atom+'updated' => true
                  }[p] ?
                  [s,Date,Time.parse(o).utc.iso8601] : [s,p,o])}
      end
      def rawTriples
        # elements
        reHead = /<(rdf|rss|feed)([^>]+)/i
        reXMLns = /xmlns:?([a-z0-9]+)?=["']?([^'">\s]+)/
        reItem = %r{<(?<ns>rss:|atom:)?(?<tag>item|entry)(?<attrs>[\s][^>]*)?>(?<inner>.*?)</\k<ns>?\k<tag>>}mi
        reElement = %r{<([a-z0-9]+:)?([a-z]+)([\s][^>]*)?>(.*?)</\1?\2>}mi
        # identifiers
        reRDF = /about=["']?([^'">\s]+)/              # RDF @about
        reLink = /<link>([^<]+)/                      # <link> element
        reLinkCData = /<link><\!\[CDATA\[([^\]]+)/    # <link> CDATA block
        reLinkHref = /<link[^>]+rel=["']?alternate["']?[^>]+href=["']?([^'">\s]+)/ # <link> @href @rel=alternate
        reLinkRel = /<link[^>]+href=["']?([^'">\s]+)/ # <link> @href
        reId = /<(?:gu)?id[^>]*>([^<]+)/              # <id> element
        # media links
        reAttach = %r{<(link|enclosure|media)([^>]+)>}mi
        reSrc = /(href|url|src)=['"]?([^'">\s]+)/
        reRel = /rel=['"]?([^'">\s]+)/
        # XML namespaces
        x = {}
        head = @doc.match(reHead)
        head && head[2] && head[2].scan(reXMLns){|m|
          prefix = m[0]
          base = m[1]
          base = base + '#' unless %w{/ #}.member? base [-1]
          x[prefix] = base}
        @doc.scan(reItem){|m|
          attrs = m[2]
          inner = m[3]
          # find post identifier
          u = (attrs.do{|a|a.match(reRDF)} || inner.match(reLink) || inner.match(reLinkCData) || inner.match(reLinkHref) || inner.match(reLinkRel) || inner.match(reId)).do{|s|s[1]}
          if u
            u = (URI.join @base, u).to_s unless u.match /^http/
            resource = u.R
            yield u, Type, R[SIOC+'BlogPost']
            blogs = [resource.join('/')]
            blogs.push @base.R.join('/') if @base.R.host != resource.host
            blogs.map{|blog| yield u, R::To, blog}
            # links
            inner.scan(reAttach){|e|
              e[1].match(reSrc).do{|url|
                rel = e[1].match reRel
                if rel
                  o = url[2].R
                  p = case o.ext.downcase
                      when 'jpg'
                        R::Image
                      when 'png'
                        R::Image
                      else
                        R::Atom + rel[1]
                      end
                  yield u, p, o
                end}}
            # XML elements
            inner.scan(reElement){|e|
              p = (x[e[0] && e[0].chop]||R::RSS) + e[1] # namespaced attribute-names
              if [Atom+'id',RSS+'link',RSS+'guid',Atom+'link'].member? p
                # used in subject URI search
              elsif [Atom+'author', RSS+'author', RSS+'creator', DCe+'creator'].member? p
                uri = e[3].match /<uri>([^<]+)</
                name = e[3].match /<name>([^<]+)</
                yield u, Creator, e[3].do{|o|o.match(/\A(\/|http)[\S]+\Z/) ? o.R : o } unless name||uri
                yield u, Creator, name[1] if name
                yield u, Creator, uri[1].R if uri
              else # generic element
                yield u,p,e[3].do{|o|
                  case o
                  when /^\s*<\!\[CDATA/m
                    o.sub /^\s*<\!\[CDATA\[(.*?)\]\]>\s*$/m,'\1'
                  when /</m
                    o
                  else
                    CGI.unescapeHTML o
                  end
                }.do{|o|o.match(/\A(\/|http)[\S]+\Z/) ? o.R : o }
              end
            }
          end}
      end
    end
  end

  FEED = -> d,e {
    H(['<?xml version="1.0" encoding="utf-8"?>',
       {_: :feed,xmlns: 'http://www.w3.org/2005/Atom',
         c: [{_: :id, c: e.uri},
             {_: :title, c: "Atom feed for " + e.uri},
             {_: :link, rel: :self, href: e.uri},
             {_: :updated, c: Time.now.iso8601},
             d.map{|u,d|
               {_: :entry,
                 c: [{_: :id, c: u}, {_: :link, href: u},
                     d[Date].do{|d|   {_: :updated, c: d[0]}},
                     d[Title].do{|t|  {_: :title,   c: t}},
                     d[Creator].do{|c|{_: :author,  c: c[0]}},
                     {_: :content, type: :xhtml,
                       c: {xmlns:"http://www.w3.org/1999/xhtml",
                           c: d[Content]}}]}}]}])}
end

class String
  def R; R.new self end
  # scan for HTTP URIs in string. example:
  # demo on the site (https://demohere) and source-code at https://sourcehere.
  # [,.] only match mid-URI, opening ( required for ) capture, <> wrapping is stripped
  def hrefs &b
    pre,link,post = self.partition(/(https?:\/\/(\([^)>\s]*\)|[,.]\S|[^\s),.”\'\"<>\]])+)/)
    u = link.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') # escaped URI
    pre.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') +    # escaped pre-match
      (link.empty? && '' || '<a href="' + u + '">' + # hyperlink
       (if u.match(/(gif|jpg|jpeg|jpg:large|png|webp)$/i) # image?
        yield(R::Image,u.R) if b # image RDF
        "<img src='#{u}'/>"      # inline image
       else
         yield(R::DC+'link',u.R) if b # link RDF
         u.sub(/^https?.../,'')  # inline text
        end) + '</a>') +
      (post.empty? && '' || post.hrefs(&b)) # recursion on post-capture tail
  end
  def sha2; Digest::SHA2.hexdigest self end
  def to_utf8; encode('UTF-8', undef: :replace, invalid: :replace, replace: '?') end
  def utf8; force_encoding 'UTF-8' end
  def sh; Shellwords.escape self end
end

module Redcarpet
  module Render
    class Pygment < HTML
      def block_code(code, lang)
        if lang
          IO.popen("pygmentize -l #{lang.downcase.sh} -f html",'r+'){|p|
            p.puts code
            p.close_write
            p.read
          }
        else
          code
        end
      end
    end
  end
end
2017-11-23T02:45:49+00:00 34325
/pw/ruby/proprietary.rb
class R

  Twitter = 'https://twitter.com'
  def fetchTweets
    nokogiri.css('div.tweet > div.content').map{|t|
      s = Twitter + t.css('.js-permalink').attr('href')
      authorName = t.css('.username b')[0].inner_text
      author = R[Twitter + '/' + authorName]
      ts = Time.at(t.css('[data-time]')[0].attr('data-time').to_i).iso8601
      yield s, Type, R[SIOC+'Tweet']
      yield s, Date, ts
      yield s, Creator, author
      yield s, To, (Twitter + '/#twitter').R
      yield s, Label, authorName
      content = t.css('.tweet-text')[0]
      content.css('a').map{|a|
        a.set_attribute('href', Twitter + (a.attr 'href')) if (a.attr 'href').match /^\//
        yield s, DC+'link', R[a.attr 'href']}
      yield s, Abstract, StripHTML[content.inner_html].gsub(/<\/?span[^>]*>/,'').gsub(/\n/,'').gsub(/\s+/,' ')}
  end
  def indexTweets
    graph = {}
    # build graph
    fetchTweets{|s,p,o|
      graph[s] ||= {'uri'=>s}
      graph[s][p] ||= []
      graph[s][p].push o}
    # serialize tweets to file(s)
    graph.map{|u,r|
      r[Date].do{|t|
        slug = (u.sub(/https?/,'.').gsub(/\W/,'.')).gsub /\.+/,'.'
        time = t[0].to_s.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
        doc = "/#{time}#{slug}.e".R
        unless doc.e
          puts u
          doc.writeFile({u => r}.to_json)
        end}}
  end
  def twitter
    open(pathPOSIX).readlines.map(&:chomp).shuffle.each_slice(22){|s|
      readURI = Twitter + '/search?f=tweets&vertical=default&q=' + s.map{|u|'from:'+u.chomp}.intersperse('+OR+').join
      readURI.R.indexTweets}
  end

end
2017-11-23T02:43:08+00:00 1591
/pw/ruby/ww.rb
# coding: utf-8
%w{cgi csv date digest/sha2 dimensions fileutils json linkeddata mail nokogiri open-uri pathname rack rdf redcarpet shellwords}.map{|r|require r}
# derive resource class
class R < RDF::URI
  def R; self end
  def R.[] uri; R.new uri end
end
# now the rest of the library can reopen R
%w{MIME HTML HTTP proprietary}.map{|r|require_relative r}
# #justArray returns one-element array for singleton object. obviates [] wrapping of RDF-object when construction Hash or JSON
# #R normalizes any type identifiable with a URI to our abstract resource
# #do passes object to block-arg. Kernel#yield_self in Ruby 2.5 may be faster than "yield self", TODO investigate once widely deployed
class Array
  # [a] -> [a]
  def justArray; self end
  def intersperse i; inject([]){|a,b|a << b << i}[0..-2] end
end
class FalseClass
  def do; self end
end
class Hash
  def R; R.new self["uri"] end
  def uri;     self["uri"] end
  def types; self[R::Type].justArray.select{|t|t.respond_to? :uri}.map &:uri end
end
class NilClass
  # nil -> []
  def justArray; [] end
  def do; self end
end
class Object
  # a -> [a]
  def justArray; [self] end
  def id; self end
  def do; yield self end
  def to_time; [Time, DateTime].member?(self.class) ? self : Time.parse(self) end
end
class Pathname
  def R; R.fromPOSIX to_s.utf8 end
end
class RDF::Node
  def R; R.new to_s end
end
class RDF::URI
  def R; R.new to_s end
end
2017-11-22T10:28:11+00:00 1411
/pw/ruby/Gemfile
source "rubygems.org/"
gem 'dimensions'
gem 'foreman'
gem 'icalendar'
gem 'linkeddata'
gem 'mail'
gem 'nokogiri'
gem 'nokogiri-diff'
gem 'pry'
gem 'pry-doc'
gem 'rack'
gem 'redcarpet'
gem 'thin'
gem 'unicorn'
2017-11-22T02:00:12+00:00
/pw/ruby/HTTP.rb
# coding: utf-8
class R
  def env; @r end
  def setEnv r; @r = r; self end
  def R.call e
    return [404,{},[]] if e['REQUEST_PATH'].match(/\.php$/i)
    return [405,{},[]] unless %w{HEAD GET}.member? e['REQUEST_METHOD']
    rawpath = e['REQUEST_PATH'].utf8.gsub /[\/]+/, '/'   # /-collapse
    path = Pathname.new(rawpath).expand_path.to_s        # evaluate path
    path += '/' if path[-1] != '/' && rawpath[-1] == '/' # preserve trailing-slash
    resource = path.R; e['uri'] = resource.uri           # resource URI
    e[:Response]={}; e[:Links]={}                        # header fields
    resource.setEnv(e).send e['REQUEST_METHOD']          # call resource
  rescue Exception => x
    msg = [x.class,x.message,x.backtrace].join "\n"
    [500,{'Content-Type' => 'text/html'},
     ["<html><head><style>body {background-color:#222; font-size:1.2em; text-align:center}\npre {text-align:left; display:inline-block; background-color:#000; color:#fff; font-weight:bold; border-radius:.6em; padding:1em}\n.number {color:#0f0; font-weight:normal; font-size:1.1em}</style></head><body><pre>",
      msg.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;').gsub(/([0-9\.]+)/,'<span class=number>\1</span>'),
      '</pre></body></html>']]
  end
  def HEAD; self.GET.do{|s,h,b|[s,h,[]]} end
  def GET
    parts = path[1..-1].split '/'
    firstPart = parts[0] || ''
    directory = node.directory?
    return file if node.file?
    return feed if parts[0] == 'feed'
    return (chrono parts) if firstPart.match(/^(y(ear)?|m(onth)?|d(ay)?|h(our)?)$/i)
    return [204,{},[]] if firstPart.match(/^gen.*204$/)
    return [302,{'Location' => path+'/'+qs},[]] if directory && path[-1]!='/' 
    dp = []
    dp.push parts.shift.to_i while parts[0] && parts[0].match(/^[0-9]+$/)
    n = nil; p = nil
    case dp.length
    when 1 # Y
      year = dp[0]
      n = '/' + (year + 1).to_s
      p = '/' + (year - 1).to_s
    when 2 # Y-m
      year = dp[0]
      m = dp[1]
      n = m >= 12 ? "/#{year + 1}/#{01}" : "/#{year}/#{'%02d' % (m + 1)}"
      p = m <=  1 ? "/#{year - 1}/#{12}" : "/#{year}/#{'%02d' % (m - 1)}"
    when 3 # Y-m-d
      day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
      if day
        p = (day-1).strftime('/%Y/%m/%d')
        n = (day+1).strftime('/%Y/%m/%d')
      end
    when 4 # Y-m-d-H
      day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
      if day
        hour = dp[3]
        p = hour <=  0 ? (day - 1).strftime('/%Y/%m/%d/23') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour-1)))
        n = hour >= 23 ? (day + 1).strftime('/%Y/%m/%d/00') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour+1)))
      end
    end
    sl = parts.empty? ? '' : (path[-1] == '/' ? '/' : '')
    @r[:Links][:prev] = p + '/' + parts.join('/') + sl + qs + '#prev' if p && R[p].e
    @r[:Links][:next] = n + '/' + parts.join('/') + sl + qs + '#next' if n && R[n].e
    @r[:Links][:up] = dirname + (dirname == '/' ? '' : '/') + qs
    if q.has_key? 'head'
      qq = q.dup; qq.delete 'head'
      @r[:Links][:down] = path + (R.qs qq)
    end
    set = (if directory
           if q.has_key?('f') && path!='/' # FIND(1) nodes
             found = find q['f']
             q['head'] = true if found.size > 127
             found
           elsif q.has_key?('q') && path!='/' # GREP(1) nodes
             grep q['q']
           else
             if uri[-1] == '/' # inside
               (self+'index.{html,ttl}').glob || [self, children] # contained nodes
             else # outside
               @r[:Links][:down] = path + '/' + qs
               self # just container
             end
           end
          else
            (match(/\*/) ? self : (self+'.*')).glob # documents
           end).justArray.flatten.compact.select &:exist?
    return notfound if !set || set.empty?
    @r[:Response].update({'Link' => @r[:Links].map{|type,uri|"<#{uri}>; rel=#{type}"}.intersperse(', ').join}) unless @r[:Links].empty?
    @r[:Response].update({'Content-Type' => format, 'ETag' => [set.sort.map{|r|[r,r.m]}, format].join.sha2})
    condResponse ->{ # body
      if set.size == 1 && set[0].mime == format
        set[0] # static body
      else # dynamic body
        if format == 'text/html'
          HTML[(load set),self]
        elsif format == 'application/atom+xml'
          FEED[(load set),self]
        else # RDF
          (loadRDF set).dump (RDF::Writer.for :content_type => format).to_sym, :base_uri => self, :standard_prefixes => true
        end
      end}
  end
  def feed; [303,@r[:Response].update({'Location'=> Time.now.strftime('/%Y/%m/%d/%H/?feed')}),[]] end
  def chrono ps
    time = Time.now
    loc = time.strftime(case ps[0][0].downcase
                        when 'y'
                          '%Y'
                        when 'm'
                          '%Y/%m'
                        when 'd'
                          '%Y/%m/%d'
                        when 'h'
                          '%Y/%m/%d/%H'
                        else
                        end)
    [303,@r[:Response].update({'Location' => '/' + loc + '/' + ps[1..-1].join('/') + (qs.empty? ? '?head' : qs)}),[]]
  end
  def file
    @r[:Response].update({'Content-Type' => mime, 'ETag' => [m,size].join.sha2})
    @r[:Response].update({'Cache-Control' => 'no-transform'}) if mime.match /^(audio|image|video)/
    if q.has_key?('preview') && ext.match(/(mp4|mkv|png|jpg)/i)
      filePreview
    else
      condResponse
    end
  end
  def filePreview
    p = join('.' + basename + '.jpg').R
    if !p.e
      if mime.match(/^video/)
        `ffmpegthumbnailer -s 256 -i #{sh} -o #{p.sh}`
      else
        `gm convert #{sh} -thumbnail "256x256" #{p.sh}`
      end
    end
    p.e && p.setEnv(@r).condResponse || notfound
  end
  def grep q
    words = R.tokens q
    case words.size
    when 0
      return []
    when 2 # unordered
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -il #{words[1].sh}"
    when 3
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -ilZ #{words[1].sh} | xargs -0 grep -il #{words[2].sh}"
    when 4
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -ilZ #{words[1].sh} | xargs -0 grep -ilZ #{words[2].sh} | xargs -0 grep -il #{words[3].sh}"
    else
      pattern = words.join '.*'
      cmd = "grep -ril #{pattern.sh} #{sh}"
    end
    `#{cmd} | head -n 255`.lines.map{|pathName|
      R.fromPOSIX pathName.chomp}
  end
  def condResponse body=nil
    etags = @r['HTTP_IF_NONE_MATCH'].do{|m| m.strip.split /\s*,\s*/ }
    if etags && (etags.include? @r[:Response]['ETag'])
      [304, {}, []]
    else
      body = body ? body.call : self
      if body.class == R # file-ref
        (Rack::File.new nil).serving((Rack::Request.new @r),body.pathPOSIX).do{|s,h,b|[s,h.update(@r[:Response]),b]}
      else
        [(@r[:Status]||200), @r[:Response], [body]]
      end
    end
  end
  def notfound; [404,{'Content-Type' => 'text/html'},[HTML[{},self]]] end
  def qs; @qs ||= (@r['QUERY_STRING'] && !@r['QUERY_STRING'].empty? && ('?' + @r['QUERY_STRING']) || '') end # qs
  def R.qs h; '?'+h.map{|k,v|k.to_s + '=' + (v ? (CGI.escape [*v][0].to_s) : '')}.intersperse("&").join('') end # Hash -> qs
  def q # qs -> Hash
    @q ||= # memoize
      (if q = @r['QUERY_STRING']
       h = {}
       q.split(/&/).map{|e|
         k, v = e.split(/=/,2).map{|x|CGI.unescape x}
         h[(k||'').downcase] = v}
       h
      else
        {}
       end)
  end
  def format; @format ||= selectFormat end
  def selectFormat
    return 'application/atom+xml' if q.has_key?('feed')
    (d={}
     @r['HTTP_ACCEPT'].do{|k|
       (k.split /,/).map{|e| # MIME/q-val pairs
         f,q = e.split /;/   # split pair
         i = q && q.split(/=/)[1].to_f || 1.0
         d[i] ||= []; d[i].push f.strip}} # index q-val
     d).sort.reverse.map{|q,formats| # ordered index
      formats.map{|mime| #serializable?
        return mime if RDF::Writer.for(:content_type => mime) || Writable.member?(mime)}}
    'text/html' # default
  end
end
2017-11-20T09:59:25+00:00 8000
/pw/ruby/install 2017-08-22T22:21:39+00:00 222
/pw/ruby/Gemfile.lock
GEM
  remote: rubygems.org/
  specs:
    addressable (2.5.1)
      public_suffix (~> 2.0, >= 2.0.2)
    bcp47 (0.3.3)
      i18n
    builder (3.2.3)
    coderay (1.1.1)
    concurrent-ruby (1.0.5)
    daemons (1.2.4)
    dimensions (1.3.0)
    ebnf (1.1.1)
      rdf (~> 2.2)
      sxp (~> 1.0)
    equivalent-xml (0.6.0)
      nokogiri (>= 1.4.3)
    eventmachine (1.2.3)
    foreman (0.84.0)
      thor (~> 0.19.1)
    haml (5.0.1)
      temple (>= 0.8.0)
      tilt
    hamster (3.0.0)
      concurrent-ruby (~> 1.0)
    htmlentities (4.3.4)
    i18n (0.8.6)
    json-ld (2.1.5)
      multi_json (~> 1.12)
      rdf (~> 2.2)
    kgio (2.11.0)
    ld-patch (0.3.1)
      ebnf (~> 1.0, >= 1.0.1)
      rdf (~> 2.0)
      rdf-xsd (~> 2.0)
      sparql (~> 2.0)
      sxp (~> 1.0)
    link_header (0.0.8)
    linkeddata (2.2.2)
      equivalent-xml (~> 0.6)
      json-ld (~> 2.1)
      ld-patch (~> 0.3)
      nokogiri (~> 1.7)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.1)
      rdf-isomorphic (~> 2.0)
      rdf-json (~> 2.0)
      rdf-microdata (~> 2.1)
      rdf-n3 (~> 2.1)
      rdf-normalize (~> 0.3)
      rdf-rdfa (~> 2.1)
      rdf-rdfxml (~> 2.0)
      rdf-reasoner (~> 0.4)
      rdf-tabular (~> 2.2)
      rdf-trig (~> 2.0)
      rdf-trix (~> 2.0)
      rdf-turtle (~> 2.2)
      rdf-vocab (~> 2.1)
      rdf-xsd (~> 2.1)
      sparql (~> 2.1)
      sparql-client (~> 2.1)
    mail (2.6.6)
      mime-types (>= 1.16, < 4)
    method_source (0.8.2)
    mime-types (3.1)
      mime-types-data (~> 3.2015)
    mime-types-data (3.2016.0521)
    mini_portile2 (2.2.0)
    multi_json (1.12.1)
    net-http-persistent (2.9.4)
    nokogiri (1.8.0)
      mini_portile2 (~> 2.2.0)
    nokogiri-diff (0.2.0)
      nokogiri (~> 1.5)
      tdiff (~> 0.3, >= 0.3.2)
    pry (0.10.4)
      coderay (~> 1.1.0)
      method_source (~> 0.8.1)
      slop (~> 3.4)
    pry-doc (0.10.0)
      pry (~> 0.9)
      yard (~> 0.9)
    public_suffix (2.0.5)
    rack (2.0.3)
    raindrops (0.18.0)
    rdf (2.2.6)
      hamster (~> 3.0)
      link_header (~> 0.0, >= 0.0.8)
    rdf-aggregate-repo (2.2.0)
      rdf (~> 2.0)
    rdf-isomorphic (2.0.0)
      rdf (~> 2.0)
    rdf-json (2.0.0)
      rdf (~> 2.0)
    rdf-microdata (2.2.1)
      htmlentities (~> 4.3)
      nokogiri (~> 1.7)
      rdf (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-n3 (2.1.0)
      rdf (~> 2.0)
    rdf-normalize (0.3.2)
      rdf (~> 2.0)
    rdf-rdfa (2.2.2)
      haml (~> 5.0)
      htmlentities (~> 4.3)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-rdfxml (2.0.0)
      htmlentities (~> 4.3)
      rdf (~> 2.0)
      rdf-rdfa (~> 2.0)
      rdf-xsd (~> 2.0)
    rdf-reasoner (0.4.2)
      rdf (~> 2.2)
      rdf-vocab (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-tabular (2.2.0)
      addressable (~> 2.3)
      bcp47 (~> 0.3, >= 0.3.3)
      json-ld (~> 2.0)
      rdf (~> 2.1)
      rdf-vocab (~> 2.0)
      rdf-xsd (~> 2.0)
    rdf-trig (2.0.0)
      ebnf (~> 1.0, >= 1.0.1)
      rdf (~> 2.0)
      rdf-turtle (~> 2.0)
    rdf-trix (2.0.0)
      rdf (~> 2.0)
    rdf-turtle (2.2.0)
      ebnf (~> 1.1)
      rdf (~> 2.2)
    rdf-vocab (2.2.3)
      rdf (~> 2.2)
    rdf-xsd (2.2.0)
      rdf (~> 2.1)
    redcarpet (3.4.0)
    slop (3.6.0)
    sparql (2.2.1)
      builder (~> 3.2)
      ebnf (~> 1.1)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.2)
      rdf-xsd (~> 2.1)
      sparql-client (~> 2.1)
      sxp (~> 1.0)
    sparql-client (2.1.0)
      net-http-persistent (~> 2.9)
      rdf (~> 2.0)
    sxp (1.0.0)
      rdf (~> 2.0)
    tdiff (0.3.3)
    temple (0.8.0)
    thin (1.7.2)
      daemons (~> 1.0, >= 1.0.9)
      eventmachine (~> 1.0, >= 1.0.4)
      rack (>= 1, < 3)
    thor (0.19.4)
    tilt (2.0.7)
    unicorn (5.3.0)
      kgio (~> 2.6)
      raindrops (~> 0.7)
    yard (0.9.9)

PLATFORMS
  ruby

DEPENDENCIES
  dimensions
  foreman
  linkeddata
  mail
  nokogiri
  nokogiri-diff
  pry
  pry-doc
  rack
  redcarpet
  thin
  unicorn

BUNDLED WITH
   1.13.6
2017-07-18T20:54:07+00:00