#!/usr/bin/perl -w # Eeva Ahonen, June 2003 # #--------------------------------------------------------------------- # Marks lines written in capital letters (unicode) with tags. # Skips everything inside <PUBLICATION_INFO> tags. # # !!! Won't work if text has any malformed utf-8 characters !!! # #---------------------------------------------------------------------- use strict; use utf8; my $publication_info = 0; while (<>) { if ( $_ =~ /<PUBLICATION_INFO>/ ) { # entering the publication info $publication_info = 1; print; next; } if ( $_ =~ /<\/PUBLICATION_INFO>/ ) { # exiting the publication info $publication_info = 0; print; next; } if ( $publication_info == 1) { # inside the publication info print; next; } if ( $_ =~ /^\p{IsLower}+$/ ) { # all capital letters -> will be tagged as title print "<TITLE>\n" . $_ . "\n"; } else { # normal text print; } }