From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gentoo-commits+bounces-1077092-garchives=archives.gentoo.org@lists.gentoo.org>
Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by finch.gentoo.org (Postfix) with ESMTPS id 7269D138334
	for <garchives@archives.gentoo.org>; Wed, 13 Mar 2019 18:40:21 +0000 (UTC)
Received: from pigeon.gentoo.org (localhost [127.0.0.1])
	by pigeon.gentoo.org (Postfix) with SMTP id 89B10E0953;
	Wed, 13 Mar 2019 18:40:20 +0000 (UTC)
Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by pigeon.gentoo.org (Postfix) with ESMTPS id 606FDE0953
	for <gentoo-commits@lists.gentoo.org>; Wed, 13 Mar 2019 18:40:20 +0000 (UTC)
Received: from oystercatcher.gentoo.org (unknown [IPv6:2a01:4f8:202:4333:225:90ff:fed9:fc84])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.gentoo.org (Postfix) with ESMTPS id A488D335D0D
	for <gentoo-commits@lists.gentoo.org>; Wed, 13 Mar 2019 18:40:18 +0000 (UTC)
Received: from localhost.localdomain (localhost [IPv6:::1])
	by oystercatcher.gentoo.org (Postfix) with ESMTP id A3E2D542
	for <gentoo-commits@lists.gentoo.org>; Wed, 13 Mar 2019 18:40:09 +0000 (UTC)
From: "Ulrich Müller" <ulm@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Content-Transfer-Encoding: 8bit
Content-type: text/plain; charset=UTF-8
Reply-To: gentoo-dev@lists.gentoo.org, "Ulrich Müller" <ulm@gentoo.org>
Message-ID: <1552339788.3db08691f11a7e6e823120073b11bd578acec57e.ulm@gentoo>
Subject: [gentoo-commits] proj/pms:master commit in: /
X-VCS-Repository: proj/pms
X-VCS-Files: Makefile
X-VCS-Directories: /
X-VCS-Committer: ulm
X-VCS-Committer-Name: Ulrich Müller
X-VCS-Revision: 3db08691f11a7e6e823120073b11bd578acec57e
X-VCS-Branch: master
Date: Wed, 13 Mar 2019 18:40:09 +0000 (UTC)
Precedence: bulk
List-Post: <mailto:gentoo-commits@lists.gentoo.org>
List-Help: <mailto:gentoo-commits+help@lists.gentoo.org>
List-Unsubscribe: <mailto:gentoo-commits+unsubscribe@lists.gentoo.org>
List-Subscribe: <mailto:gentoo-commits+subscribe@lists.gentoo.org>
List-Id: Gentoo Linux mail <gentoo-commits.gentoo.org>
X-BeenThere: gentoo-commits@lists.gentoo.org
X-Auto-Response-Suppress: DR, RN, NRN, OOF, AutoReply
X-Archives-Salt: 352e3c0b-f8e9-4b31-a31d-e52c16f00aed
X-Archives-Hash: afb76a85a83cf5f30444849e56ab4f3a

commit:     3db08691f11a7e6e823120073b11bd578acec57e
Author:     Ulrich Müller <ulm <AT> gentoo <DOT> org>
AuthorDate: Mon Mar 11 21:29:48 2019 +0000
Commit:     Ulrich Müller <ulm <AT> gentoo <DOT> org>
CommitDate: Mon Mar 11 21:29:48 2019 +0000
URL:        https://gitweb.gentoo.org/proj/pms.git/commit/?id=3db08691

Makefile: Change encoding of HTML file to UTF-8.

This will allow to drop the dependency on app-text/recode.

Replace ligatures in tex4ht output by their components, because they
would interfere with text search. Update sed expression for the list
of tables workaround.

Signed-off-by: Ulrich Müller <ulm <AT> gentoo.org>

 Makefile | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 5359342..612af4c 100644
--- a/Makefile
+++ b/Makefile
@@ -44,20 +44,20 @@ pms.dvi: $(LATEXFILES) pms.bbl $(COMMITINFO)
 pms.html: $(LATEXFILES) pms.bbl $(COMMITINFO)
 	set -e; sum=''; \
 	while true; do \
-	  mk4ht xhlatex pms xhtml,fn-in; \
+	  mk4ht xhlatex pms 'xhtml,fn-in,charset=utf-8' ' -cunihtf -utf8'; \
 	  oldsum=$${sum}; sum=$$(cksum $@); \
 	  test "$${sum}" != "$${oldsum}" || break; \
 	done
-	@# some www servers ignore meta tags, resulting in a wrong charset.
-	@# therefore recode the very few non-ascii characters
-	recode -d l1..h3 $@
-	@# declare encoding as utf-8, although it is pure ascii
-	LC_ALL=C sed -i -e '/<?xml\|<meta/s/iso-8859-1/utf-8/' $@
+	@# replace ligatures by their component letters
+	LC_ALL=C sed -i "$$(printf 's/\\xef\\xac\\x8%s/%s/g;' \
+	  0 ff 1 fi 2 fl 3 ffi 4 ffl)" $@
 	@# work around irregularity in how links to longtables are
 	@# formatted in the List of Tables
-	LC_ALL=C sed -i -e '/<span class="lotToc" >&#x00A0;/{N;N;s/\(&#x00A0;<a \nhref="[^"]\+">\)\([0-9A-Z.]\+\)[ \n]\+/\2\1/}' $@
+	LC_ALL=C sed -Ei '/<span class="lotToc" *>\B/{N;N;'\
+	's/([^>]*<a\s+href="[^"]+">)([0-9A-Z.]+)\s+/\2\1/;}' $@
 	@# remove redundant span elements
-	LC_ALL=C sed -i -e ':x;/<span\(\s\+[^>]*\)\?$$/{N;bx;};:y;s/\(<span\s\+[^>]*>\)\([^<]*\)<\/span>\1/\1\2/;ty' $@
+	LC_ALL=C sed -Ei ':x;/<span(\s+[^>]*)?$$/{N;bx;};'\
+	':y;s,(<span\s+[^>]*>)([^<]*)</span>\1,\1\2,;ty' $@
 
 pms.bbl: pms.bib $(LATEXFILES) $(COMMITINFO)
 	$(aux-clean)