etag in CLI

2016-01-28 23:46:21 +01:00 · 2016-01-28 23:46:21 +01:00 · d9e98f704a
parent e73af72e67
commit d9e98f704a
1 changed files with 71 additions and 43 deletions
--- a/cli/paperfind.sh
+++ b/cli/paperfind.sh
@ -23,18 +23,26 @@
 #   Result (WSN):
 #     OUTPUT   = "[" [ DOC_LIST ] "]" .
 #     DOC_LIST = DOC { "," DOC } .
-#     DOC      = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ] "],""count"":" COUNT ",""type"":" TYPE "}" .
+#     DOC      = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ]
 #                "],""count"":" COUNT ",""type"":" TYPE ",""etag"":" ETAG "}" .
 #     FOLDER   = json_string .
 #     LABELS   = json_string { "," json_string } .
 #     COUNT    = json_number .
 #     TYPE     = """pdf""" | """pages""" .
 #     ETAG     = json_string .
 #
 # Retrieve a single document's metadata:
 #   -M <date>
 #   Result (WSN):
 #     OUTPUT   = DOC .
 #
 # Retrieve a document's thumbnails:
 #   -T <date> : the folder-name of the document
 #   Result (WSN):
 #     OUTPUT   = "[" THUMBS "]" .
 #     THUMBS   = CONTENTS { "," CONTENTS } .
-#     CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
+#     CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":"
 #                WIDTH ",""height"":" HEIGHT ",""etag"":" ETAG "}" .
 #     MIME     = json_string .
 #     B64_DATA = json_string .
 #     WIDTH    = json_number .
@ -48,10 +56,11 @@
 #   document is a PDF file: then the whole PDF file is encoded.
 #   Width and height should be ignored for PDF contents.
 #
-# Retrieve a document's page's metadata without the actual page:
+# Retrieve a document page's metadata without the actual page:
 #   -M <date> -p <page number>
 #   Result (WSN):
-#     OUTPUT   = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
+#     OUTPUT   = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":"
 #                HEIGHT ",""etag"":" ETAG "}" .
 #   Width and height should be ignored for PDF contents.
 #
 # Retrieve a raw document's page/PDF without metadata:
@ -101,29 +110,70 @@ Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i
 T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
 D|M|R)
   [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
-   if [ -f "$BASE/$doc/doc.pdf" ]; then
+   if [ $mode != M -o -n "$page" ]; then
-     if [ -n "$pdfasjpg" ]; then
+     if [ -f "$BASE/$doc/doc.pdf" ]; then
-       maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
+       if [ -n "$pdfasjpg" ]; then
-       [ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
+         maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
         [ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
       fi
     else
       [ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
     fi
   else
     [ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
   fi ;;
 *) exit 1 ;;
 esac
 # RUN
 # &0: image data
 # &1: "<width> <height>"
 function image_wh() {
  file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
 }
 # &0: raw string
 # &1: json string
 function json_string() {
  printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')"
 }
-# &0: image data
+# $1: folder name (relative path)
-# &1: "<width> <height>"
+# &1: json DOC
-function image_wh() {
+function json_doc() {
-  file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
+  local type count labs lab nil etag
  [ -f $1/doc.pdf ] && type=pdf || type=pages
  count=$(/bin/ls -1 $1/paper.*.thumb.jpg 2>/dev/null | wc -l)
  labs="$(
    while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $1/labels))"
  etag=$(find $1 -maxdepth 0 -printf '%T@')
  printf '{"folder":%s,"labels":[%s],"count":%d,"type":"%s","etag":%s}' \
    "$(json_string <<<"$1")" "${labs:1}" $count "$type" "$(json_string <<<"$etag")"
 }
 # $1: file path
 #[$2: page number (if it must be extracted from a PDF)]
 #[$3: "nodata"]
 # &1: json CONTENTS
 function json_contents() {
  local mime w=0 h=0 etag
  local -a cmd=(cat $1)
  mime=$(file -bi "$1" | cut -d';' -f1)
  if [ "$mime" != 'application/pdf' ]; then
    read w h < <(image_wh <$1)
  elif [ -n "$2" -a -n "$pdfasjpg" ]; then
    cmd=(pdftoppm -r $PDF_DPI -jpeg -f $2 -l $2 $1)
    mime=image/jpeg
    read w h < <("${cmd[@]}" | image_wh)
  fi
  etag=$(find $1 -printf '%T@')
  if [ -n "$3" ]; then
    printf '{"mime":%s,"width":%d,"height":%d,"etag":%s}' \
      "$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")"
  else
    printf '{"mime":%s,"width":%d,"height":%d,"etag":%s,"data":%s}' \
      "$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")" \
      "$("${cmd[@]}" | base64 --wrap=0 | json_string)"
  fi
 }
 cd "$BASE"
@ -167,12 +217,7 @@ Q)
  fi
  printf '['
  while IFS=/ read folder nil; do if [ -n "$folder" ]; then
-    [ -f $folder/doc.pdf ] && type=pdf || type=pages
+    printf ',%s' "$(json_doc $folder)"
    count=$(/bin/ls -1 $folder/paper.*.thumb.jpg 2>/dev/null | wc -l)
    labs="$(
      while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $folder/labels))"
    printf ',{"folder":%s,"labels":[%s],"count":%d,"type":"%s"}' \
      "$(json_string <<<"$folder")" "${labs:1}" $count "$type"
  fi; done < <(sort -r <<<"$found") | sed 's/^.//'
  printf ']'
  ;;
@ -180,34 +225,17 @@ T)
  printf '['
  /bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
  | while read t; do
-    read w h < <(image_wh <$t)
+    printf ',%s' "$(json_contents $t)"
    printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
      "$(base64 --wrap=0 "$t" | json_string)" $w $h
  done | sed 's/^.//'
  printf ']'
  ;;
 D|M)
-  if [ -f $doc/doc.pdf ]; then
+  if [ -z "$page" ]; then
-    p=$doc/doc.pdf
+    json_doc $doc
-    if [ -n "$pdfasjpg" ]; then
+  elif [ -f $doc/doc.pdf ]; then
-      read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh)
+    json_contents $doc/doc.pdf $page ${mode/D}
      mime='image/jpeg'
      cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
    else
      w=0; h=0; mime='application/pdf'
      cmd=(cat $p)
    fi
  else
-    p=$doc/paper.$page.jpg
+    json_contents $doc/paper.$page.jpg '' ${mode/D}
    read w h < <(image_wh <$p)
    mime='image/jpeg'
    cmd=(cat $p)
  fi
  if [ $mode == D ]; then
    printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
      "$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
  else
    printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
  fi
  ;;
 R)