#!/usr/bin/env perl

# This is the script formerly known as loadDB2, and then known as NPL-update.

# It is used to update
# the database when it comes to the WeBWorK Open Problem Library (OPL).
# This should be run after doing a git clone or pull for the OPL
# files.

# In order for this script to work:
#   1) The OPL downloaded to your machine (the .pg files)
#   2) Configuration for the OPL in site.conf needs to be
#      done (basically just setting the path to the OPL files).

#use strict;
use File::Find;
use File::Find::Rule;
use File::Basename;
use Cwd;
use DBI;

 #(maximum varchar length is 255 for mysql version < 5.0.3.
 #You can increase path length to  4096 for mysql > 5.0.3)

# Taxonomy global variables
# Make a hash of hashes of hashes to record what is legal
# Also create list for json file
my $taxo={};
#my $taxsubs = [];

### Data for creating the database tables

my %OPLtables = (
 dbsubject => 'OPL_DBsubject',
 dbchapter => 'OPL_DBchapter',
 dbsection => 'OPL_DBsection',
 author => 'OPL_author',
 path => 'OPL_path',
 pgfile => 'OPL_pgfile',
 keyword => 'OPL_keyword',
 pgfile_keyword => 'OPL_pgfile_keyword',
 textbook => 'OPL_textbook',
 chapter => 'OPL_chapter',
 section => 'OPL_section',
 problem => 'OPL_problem',
 morelt => 'OPL_morelt',
 pgfile_problem => 'OPL_pgfile_problem',
);


my %NPLtables = (
 dbsubject => 'NPL-DBsubject',
 dbchapter => 'NPL-DBchapter',
 dbsection => 'NPL-DBsection',
 author => 'NPL-author',
 path => 'NPL-path',
 pgfile => 'NPL-pgfile',
 keyword => 'NPL-keyword',
 pgfile_keyword => 'NPL-pgfile-keyword',
 textbook => 'NPL-textbook',
 chapter => 'NPL-chapter',
 section => 'NPL-section',
 problem => 'NPL-problem',
 morelt => 'NPL-morelt',
 pgfile_problem => 'NPL-pgfile-problem',
);

BEGIN {
	use Mojo::File qw(curfile);
	use Env qw(WEBWORK_ROOT);

	$WEBWORK_ROOT = curfile->dirname->dirname;
}

# Get database connection
use lib "$ENV{WEBWORK_ROOT}/lib";
use lib "$ENV{WEBWORK_ROOT}/bin";
use WeBWorK::CourseEnvironment;
use WeBWorK::Utils::Tags;
use OPLUtils qw/build_library_directory_tree build_library_subject_tree build_library_textbook_tree writeJSONtoFile/;

my $ce = WeBWorK::CourseEnvironment->new({ webwork_dir => $ENV{WEBWORK_ROOT} });

# decide whether the mysql installation can handle
# utf8mb4 and that should be used for the OPL

my $ENABLE_UTF8MB4 = ($ce->{ENABLE_UTF8MB4})?1:0;
print  "using utf8mb4 \n\n" if $ENABLE_UTF8MB4;

# The DBD::MariaDB driver should not get the
#    mysql_enable_utf8mb4 or mysql_enable_utf8 settings,
# but DBD::mysql should.
my %utf8_parameters = ();

if ( $ce->{database_driver} =~ /^mysql$/i ) {
	# Only needed for older DBI:mysql driver
	if ( $ENABLE_UTF8MB4 ) {
		$utf8_parameters{mysql_enable_utf8mb4} = 1;
	} else {
		$utf8_parameters{mysql_enable_utf8} = 1;
	}
}

my $dbh = DBI->connect(
	$ce->{problemLibrary_db}->{dbsource},
	$ce->{problemLibrary_db}->{user},
	$ce->{problemLibrary_db}->{passwd},
	{
		PrintError => 0,
		RaiseError => 1,
		%utf8_parameters,
	},
);

my $character_set='';
$character_set=($ENABLE_UTF8MB4)?"utf8mb4":"utf8";
$dbh->prepare("SET NAMES '$character_set'")->execute();

print "using character set $character_set to build OPL database\n";

my $libraryRoot = $ce->{problemLibrary}{root};
my $contribRoot = $ce->{contribLibrary}{root};
$libraryRoot =~ s|/+$||;
$contribRoot =~ s|/+$||;
print "using libraryRoot $libraryRoot\n";
print "using contribRoot $contribRoot\n";
print "WEBWORK_ROOT $ENV{WEBWORK_ROOT}\n";
my $libraryVersion = $ce->{problemLibrary}->{version};
my $db_storage_engine = $ce->{problemLibrary_db}->{storage_engine};

my $verbose = 0;
my $cnt2 = 0;
# Can force library version
$libraryVersion = $ARGV[0] if(@ARGV);

# auto flush printing
my $old_fh = select(STDOUT);
$| = 1;
select($old_fh);

sub dbug {
	my $msg = shift;
	my $insignificance = shift || 2;
	print $msg if($verbose>=$insignificance);
}

##Figure out which set of tables to use

my %tables;
if($libraryVersion eq '2.5') {
	%tables = %OPLtables;
	my $lib = 'OPL';
	warn "Library version is $libraryVersion; using OPLtables!\n";
} else {
	%tables = %NPLtables;
	my $lib = 'NPL';
	print "Library version is $libraryVersion; NPLtables! \n";
}

@create_tables = (
[$tables{dbsubject}, '
	DBsubject_id int(15) NOT NULL auto_increment,
	name varchar(245) NOT NULL,
	KEY DBsubject (name),
	PRIMARY KEY (DBsubject_id)
'],
[$tables{dbchapter}, '
	DBchapter_id int(15) NOT NULL auto_increment,
	name varchar(245) NOT NULL,
	DBsubject_id int(15) DEFAULT 0 NOT NULL,
	KEY DBchapter (name),
	KEY (DBsubject_id),
	PRIMARY KEY (DBchapter_id)
'],
[$tables{dbsection}, '
	DBsection_id int(15) NOT NULL auto_increment,
	name varchar(245) NOT NULL,
	DBchapter_id int(15) DEFAULT 0 NOT NULL,
	KEY DBsection (name),
	KEY (DBchapter_id),
	PRIMARY KEY (DBsection_id)
'],
[$tables{author}, '
	author_id int (15) NOT NULL auto_increment,
	institution tinyblob,
	lastname varchar (255) NOT NULL,
	firstname varchar (255) NOT NULL,
	email varchar (255),
	KEY author (lastname(100), firstname(100)),
	PRIMARY KEY (author_id)
'],
[$tables{path}, '
	path_id int(15) NOT NULL auto_increment,
	path varchar(245) NOT NULL,
	machine varchar(255),
	user varchar(255),
	KEY (path),
	PRIMARY KEY (path_id)
'],
[$tables{pgfile}, '
	pgfile_id int(15) NOT NULL auto_increment,
	DBsection_id int(15) NOT NULL,
	author_id int(15),
	institution tinyblob,
	libraryroot varchar(255) NOT NULL,
	path_id int(15) NOT NULL,
	filename varchar(255) NOT NULL,
	morelt_id int(127) DEFAULT 0 NOT NULL,
	level int(15),
	language varchar(255),
	static TINYINT,
	MO TINYINT,
	PRIMARY KEY (pgfile_id)
'],
[$tables{keyword}, '
	keyword_id int(15) NOT NULL auto_increment,
	keyword varchar(245) NOT NULL,
	KEY (keyword),
	PRIMARY KEY (keyword_id)
'],
[$tables{pgfile_keyword}, '
	pgfile_id int(15) DEFAULT 0 NOT NULL,
	keyword_id int(15) DEFAULT 0 NOT NULL,
	KEY pgfile_keyword (keyword_id, pgfile_id),
	KEY pgfile (pgfile_id)
'],
[$tables{textbook}, '
	textbook_id int (15) NOT NULL auto_increment,
	title varchar (255) NOT NULL,
	edition int (15) DEFAULT 0 NOT NULL,
	author varchar (255) NOT NULL,
	publisher varchar (255),
	isbn char (15),
	pubdate varchar (255),
	PRIMARY KEY (textbook_id)
'],
[$tables{chapter}, '
	chapter_id int (15) NOT NULL auto_increment,
	textbook_id int (15),
	number int(3),
	name varchar(245) NOT NULL,
	page int(4),
	KEY (textbook_id, name),
	KEY (number),
	PRIMARY KEY (chapter_id)
'],
[$tables{section}, '
	section_id int(15) NOT NULL auto_increment,
	chapter_id int (15),
	number int(3),
	name varchar(245) NOT NULL,
	page int(4),
	KEY (chapter_id, name),
	KEY (number),
	PRIMARY KEY section (section_id)
'],
[$tables{problem}, '
	problem_id int(15) NOT NULL auto_increment,
	section_id int(15),
	number int(4) NOT NULL,
	page int(4),
	#KEY (page, number),
	KEY (section_id),
	PRIMARY KEY (problem_id)
'],
[$tables{morelt}, '
	morelt_id int(15) NOT NULL auto_increment,
	name varchar(245) NOT NULL,
	DBsection_id int(15),
	leader int(15), # pgfile_id of the MLT leader
	KEY (name),
	PRIMARY KEY (morelt_id)
'],
[$tables{pgfile_problem}, '
	pgfile_id int(15) DEFAULT 0 NOT NULL,
	problem_id int(15) DEFAULT 0 NOT NULL,
	PRIMARY KEY (pgfile_id, problem_id)
']);

### End of database data

## Resetting the database tables.
#  First take care of tables which are no longer used

$dbh->do("DROP TABLE IF EXISTS `NPL-institution`");
$dbh->do("DROP TABLE IF EXISTS `NPL-pgfile-institution`");

for my $tableinfo (@create_tables) {
	my $tabname = $tableinfo->[0];
	my $tabinit = $tableinfo->[1];
	my $query = "DROP TABLE IF EXISTS `$tabname`";
	$dbh->do($query);
	$query = "CREATE TABLE `$tabname` ( $tabinit ) ENGINE=$db_storage_engine CHARACTER SET $character_set";
	$dbh->do($query);
	if($lib eq 'OPL') {
		$old_tabname = $tabname;
		$old_tabname =~ s/OPL/NPL/;
		$old_tabname =~ s/_/-/g;
		$query = "DROP TABLE IF EXISTS `$old_tabname`";
		$dbh -> do($query);
	}
}


print "Mysql database reinitialized.\n";

# From pgfile
## DBchapter('Limits and Derivatives')
## DBsection('Calculating Limits using the Limit Laws')
## Date('6/3/2002')
## Author('Tangan Gao')
## Institution('csulb')
## TitleText1('Calculus Early Transcendentals')
## EditionText1('4')
## AuthorText1('Stewart')
## Section1('2.3')
## Problem1('7')

# Get an id, and add entry to the database if needed
sub safe_get_id {
	my $tablename = shift;
	my $idname = shift;
	my $whereclause = shift;
	my $wherevalues = shift;
	my $addifnew = shift;
	my @insertvalues = @_;
#print "\nCalled with $tablename, $idname, $whereclause, [".join(',', @$wherevalues)."], (".join(',', @insertvalues).")\n";
	for my $j (0..$#insertvalues) {
		$insertvalues[$j] =~ s/"/\\\"/g;
	}

	my $query = "SELECT $idname FROM `$tablename` ".$whereclause;
	my $sth = $dbh->prepare($query);
	$sth->execute(@$wherevalues);
	my $idvalue, @row;
	unless(@row = $sth->fetchrow_array()) {
		return 0 unless $addifnew;
		for my $j (0..$#insertvalues) {
			#print "Looking at ".$insertvalues[$j]."\n";
			if ($insertvalues[$j] ne "") {
				$insertvalues[$j] = '"'.$insertvalues[$j].'"';
			} else {
				$insertvalues[$j] = NULL;
			}
		}
		$dbh->do("INSERT INTO `$tablename` VALUES(". join(',',@insertvalues) .")");
		dbug "INSERT INTO $tablename VALUES( ".join(',',@insertvalues).")\n";
		$sth = $dbh->prepare($query);
		$sth->execute(@$wherevalues);
		@row = $sth->fetchrow_array();
	}
	$idvalue = $row[0];
	return($idvalue);
}

sub isvalid {
	my $tags = shift;
	if(not defined $taxo->{$tags->{DBsubject}}) {
		print "\nInvalid subject ".$tags->{DBsubject}."\n";
		return 0;
	}
	if(not ($tags->{DBchapter} eq 'Misc.') and not defined $taxo->{$tags->{DBsubject}}->{$tags->{DBchapter}}) {
		print "\nInvalid chapter ".$tags->{DBchapter}."\n";
		return 0;
	}
	if(not ($tags->{DBsection} eq 'Misc.') and not defined $taxo->{$tags->{DBsubject}}->{$tags->{DBchapter}}->{$tags->{DBsection}}) {
		print "\nInvalid section ".$tags->{DBsection}."\n";
		return 0;
	}
	return 1;
}

#### First read in textbook information

if(open(IN, '<:encoding(UTF-8)', "$libraryRoot/Textbooks")) {
	print "Reading in textbook data from Textbooks in the library $libraryRoot.\n";
	my %textinfo = ( TitleText => '', EditionText =>'', AuthorText=>'');
	my $bookid = undef;
	while (my $line = <IN>) {
		$line =~ s|#*$||;
		if($line =~ /^\s*(.*?)\s*>>>\s*(.*?)\s*$/) { # Should have chapter or section information
			my $chapsec = $1;
			my $title = $2;
			if($chapsec=~ /(\d+)\.(\d+)/) { # We have a section
				if(defined($bookid)) {
					my $query = "SELECT chapter_id FROM `$tables{chapter}` WHERE textbook_id = \"$bookid\" AND number = \"$1\"";
					my $chapid = $dbh->selectrow_array($query);
					if(defined($chapid)) {
						my $sectid = safe_get_id($tables{section}, 'section_id',
							qq(WHERE chapter_id = ? and name = ?), [$chapid, $title], 1, "", $chapid, $2, $title, "");
					} else {
						print "Cannot enter section $chapsec because textbook information is missing the chapter entry\n";
					}
				} else {
					print "Cannot enter section $chapsec because textbook information is incomplete\n";
				}
			} else { # We have a chapter entry
				if(defined($bookid)) {
					my $chapid = safe_get_id($tables{chapter}, 'chapter_id',
						qq(WHERE textbook_id = ? AND number = ?), [$bookid, $chapsec], 1, "", $bookid, $chapsec, $title, "");

						# Add dummy section entry for problems tagged to the chapter
						# without a section
						$query = "SELECT section_id FROM `$tables{section}` WHERE chapter_id = \"$chapid\" AND number = -1";
						my $sectid = $dbh->selectrow_array($query);
						if (!defined($sectid)) {
							$dbh->do("INSERT INTO `$tables{section}`
						VALUES(
							NULL,
							\"$chapid\",
							\"-1\",
							\"\",
							NULL
						)"
								);
						dbug "INSERT INTO section VALUES(\"\", \"$chapid\", \"-1\", \"\", \"\" )\n";
						}
				} else {
					print "Cannot enter chapter $chapsec because textbook information is incomplete\n";
				}
			}
		} elsif($line =~ /^\s*(TitleText|EditionText|AuthorText)\(\s*'(.*?)'\s*\)/) {
			# Textbook information, maybe new
			my $type = $1;
			if(defined($textinfo{$type})) { # signals new text
				%textinfo = ( TitleText => undef,
							  EditionText =>undef,
							  AuthorText=> undef);
				$textinfo{$type} = $2;
				$bookid = undef;
			} else {
				$textinfo{$type} = $2;
				if(defined($textinfo{TitleText}) and
				   defined($textinfo{AuthorText}) and
				   defined($textinfo{EditionText})) {
					my $query = "SELECT textbook_id FROM `$tables{textbook}` WHERE title = \"$textinfo{TitleText}\" AND edition = \"$textinfo{EditionText}\" AND author=\"$textinfo{AuthorText}\"";
					$bookid = $dbh->selectrow_array($query);
					if (!defined($bookid)) {
						$dbh->do("INSERT INTO `$tables{textbook}`
					VALUES(
						NULL,
						\"$textinfo{TitleText}\",
						\"$textinfo{EditionText}\",
						\"$textinfo{AuthorText}\",
						NULL,
						NULL,
						NULL
					)"
							);
						dbug "INSERT INTO textbook VALUES( \"\", \"$textinfo{TitleText}\", \"$textinfo{EditionText}\", \"$textinfo{AuthorText}\", \"\", \"\", \"\" )\n";
						$bookid = $dbh->selectrow_array($query);
					}
				}
			}
		}
	}
	close(IN);
} else {
	print "Textbooks file was not found in library $libraryRoot. If the path to the problem library doesn't seem
	correct, make modifications in webwork2/conf/site.conf (\$problemLibrary{root}).  If that is correct then
	updating from git should download the Textbooks file.\n";
}
#### End of textbooks

#### Next read in the taxonomy
my $clsep = '<<<';
my $clinner = '__';
my @cllist = ();
# Record full taxonomy for tagging menus (does not include cross-lists)
my $tagtaxo = [];
my ($chaplist, $seclist) = ([],[]);

my $canopenfile = 0;
if(open(IN, '<:encoding(UTF-8)', "$libraryRoot/Taxonomy2")) {
	print "Reading in OPL taxonomy from Taxonomy2 in the library $libraryRoot.\n";
	$canopenfile = 1;
} elsif(open(IN, '<:encoding(UTF-8)', "$libraryRoot/Taxonomy")) {
	print "Reading in OPL taxonomy from Taxonomy in the library $libraryRoot.\n";
	$canopenfile = 1;
} else {
	print "Taxonomy file was not found in library $libraryRoot. If the path to the problem library doesn't seem
	correct, make modifications in webwork2/conf/site.conf (\$problemLibrary{root}).  If that is correct then
	updating from git should download the Taxonomy file.\n";
}

# Taxonomy is a subset of Taxonomy2, so we can use the same code either way
if($canopenfile) {
	my ($cursub,$curchap); # these are strings
	my ($subj, $chap, $sect); # these are indeces
	while(my $line = <IN>) {
		$line =~ /^(\t*)/;
		my $count = length($1);
		my $oktag = 1;
		chomp($line);
		if($line =~ m/$clsep/) {
			$oktag = 0;
			my @cross = split $clsep, $line;
			@cross = map(trim($_), @cross);
			if(scalar(@cross) > 1) {
				push @cllist, [join($clinner, ($cursub,$curchap,$cross[0])) ,$cross[1]];
			}
			$line = $cross[0];
		}
		$line = trim($line);

		# We put the line in the database in all cases
		# but crosslists are not put in the heierarchy of legal tags
		# instead they go in a list of crosslists to deal with after
		# the full taxonomy is read in
		if($count == 0) { #DBsubject
			$cursub = $line;
			if($oktag) {
				$taxo->{$line} = {};
				($chaplist, $seclist) = ([],[]);
				push @{$tagtaxo}, {name=>$line, subfields=>$chaplist};
			}
			$subj = safe_get_id($tables{dbsubject}, 'DBsubject_id',
				qq(WHERE name = ?), [$line], 1, "", $line);
		} elsif($count == 1) { #DBchapter
			if($oktag) {
				$taxo->{$cursub}->{$line} = {};
				$seclist=[];
				push @{$chaplist}, {name=>$line, subfields=>$seclist};
			}
			$curchap = $line;
			$chap = safe_get_id($tables{dbchapter}, 'DBchapter_id',
				qq(WHERE name = ? and DBsubject_id = ?), [$line, $subj], 1, "", $line, $subj);
		} else { #DBsection
			if($oktag) {
				$taxo->{$cursub}->{$curchap}->{$line} = [];
				push @{$seclist}, {name=>$line};
			}
			$sect = safe_get_id($tables{dbsection}, 'DBsection_id',
				qq(WHERE name = ? and DBchapter_id = ?), [$line, $chap], 1, "", $line, $chap);
		}
	}
	close(IN);
}
#### End of taxonomy/taxonomy2

#### Save the official taxonomy in json format
my $webwork_htdocs = $ce->{webworkDirs}{htdocs};
my $file = "$webwork_htdocs/DATA/tagging-taxonomy.json";

writeJSONtoFile($tagtaxo,$file);
print "Saved taxonomy to $file.\n";

#### Now deal with cross-listed sections
for my $clinfo (@cllist) {
	my @scs = split /$clinner/, $clinfo->[1];
	if(defined $taxo->{$scs[0]}->{$scs[1]}->{$scs[2]}) {
			push @{$taxo->{$scs[0]}->{$scs[1]}->{$scs[2]}}, $clinfo->[0];
	} else {
		print "Faulty cross-list: pointing to $scs[0] / $scs[1] / $scs[2]\n";
	}
}

print "Converting data from tagged pgfiles into mysql.\n";
print "Number of files processed:\n";

#### Now search for tagged problems
#recursive search for all pg files

File::Find::find( { wanted => \&pgfiles, follow_fast => 1 }, $libraryRoot );
File::Find::find( { wanted => \&pgfiles, follow_fast => 1 }, $contribRoot );

sub trim {
	my $str = shift;
	$str =~ s/^\s+//;
	$str =~ s/\s+$//;
	return $str;
}

sub kwtidy {
	my $s = shift;
	$s =~ s/\W//g;
	$s =~ s/_//g;
	$s = lc($s);
	return($s);
}

sub keywordcleaner {
	my $string = shift;
	my @spl1 = split /,/, $string;
	my @spl2 = map(kwtidy($_), @spl1);
	return(@spl2);
}

# Save on passing these values around
my %textinfo;

# Initialize, if needed more text-info information;
sub maybenewtext {
	my $textno = shift;
	return if defined($textinfo{$textno});
	# So, not defined yet
	$textinfo{$textno} = { title => '', author =>'', edition =>'',
						   section => '', chapter =>'', problems => [] };
}

# process each file returned by the find command.
sub pgfiles {
	my $name = $File::Find::name;
	my ($text, $edition, $textauthor, $textsection, $textproblem);
	%textinfo=();
	my @textproblems = (-1);
#print "\n$name";

	if ($name =~ /\.pg$/) {
		$cnt2++;
		printf("%6d", $cnt2) if(($cnt2 % 100) == 0);
		print "\n" if(($cnt2 % 1000) == 0);

		my $pgfile = basename($name);
		my $pgpath = dirname($name);
		$pgpath =~ s|^$libraryRoot|Library|;
		$pgpath =~ s|^$contribRoot|Contrib|;
		$pgpath =~ m|^([^/]*)/(.*)|;
		my ($pglib, $pgpath) = ($1, $2);

		my $tags = WeBWorK::Utils::Tags->new($name);

		if ($tags->istagged()) {
			# Fill in missing data with Misc. instead of blank
			print "\nNO SUBJECT $name\n" unless ($tags->{DBsubject} =~ /\S/);

			$tags->{DBchapter} = 'Misc.' unless $tags->{DBchapter} =~ /\S/;
			$tags->{DBsection} = 'Misc.' unless $tags->{DBsection} =~ /\S/;

			# DBsubject table

			if(isvalid($tags)) {
				my $DBsubject_id = safe_get_id($tables{dbsubject}, 'DBsubject_id',
					qq(WHERE BINARY name = ?), [$tags->{DBsubject}], 1, "", $tags->{DBsubject});
				if(not $DBsubject_id) {
					print "\nInvalid subject '$tags->{DBsubject}' in $name\n";
					return;
				}

				# DBchapter table

				$DBchapter_id = safe_get_id($tables{dbchapter}, 'DBchapter_id',
					qq(WHERE BINARY name = ? and DBsubject_id = ?), [$tags->{DBchapter}, $DBsubject_id], 1, "", $tags->{DBchapter}, $DBsubject_id);
				if(not $DBchapter_id) {
					print "\nInvalid chapter '$tags->{DBchapter}' in $name\n";
					return;
				}

				# DBsection table

				$aDBsection_id = safe_get_id($tables{dbsection}, 'DBsection_id',
					qq(WHERE BINARY name = ? and DBchapter_id = ?), [$tags->{DBsection}, $DBchapter_id], 1, "", $tags->{DBsection}, $DBchapter_id);
				if(not $aDBsection_id) {
					print "\nInvalid section '$tags->{DBsection}' in $name\n";
					return;
				}
			} else { # tags are not valid, error printed by validation part
				print "File $name\n";
				return;
			}

			my @DBsection_ids=($aDBsection_id);
			# Now add crosslisted section
			my @CL_array = @{$taxo->{$tags->{DBsubject}}->{$tags->{DBchapter}}->{$tags->{DBsection}}};
			for my $clsect (@CL_array) {
				my @np = split /$clinner/, $clsect;
				@np = map(trim($_), @np);
				my $new_dbsubj_id = safe_get_id($tables{dbsubject}, 'DBsubject_id',
					qq(WHERE name = ?), [$np[0]], 1, "", $np[0]);
				my $new_dbchap_id = safe_get_id($tables{dbchapter}, 'DBchapter_id',
					qq(WHERE name = ? and DBsubject_id = ?), [$np[1], $new_dbsubj_id], 1, "", $np[1], $new_dbsubj_id);
				my $new_dbsect_id = safe_get_id($tables{dbsection}, 'DBsection_id',
					qq(WHERE name = ? and DBchapter_id = ?), [$np[2], $new_dbchap_id], 1, "", $np[2], $new_dbchap_id);
				push @DBsection_ids, $new_dbsect_id;
			}

			# author table

			$tags->{Author} =~ /(.*?)\s(\w+)\s*$/;
			my $firstname = $1;
			my $lastname = $2;
			#remove leading and trailing spaces from firstname, which includes any middle name too.
			$firstname =~ s/^\s*//;
			$firstname =~ s/\s*$//;
			$lastname =~ s/^\s*//;
			$lastname =~ s/\s*$//;
			my $author_id = 0;
			if($lastname) {
				$author_id = safe_get_id($tables{author}, 'author_id',
					qq(WHERE lastname = ? AND firstname = ?), [$lastname, $firstname], 1, "", $tags->{Institution}, $lastname, $firstname,"");
			}

			# path table

			my $path_id = safe_get_id($tables{path}, 'path_id',
				qq(WHERE path = ?), [$pgpath], 1, "", $pgpath, "", "");

			# pgfile table -- set 4 defaults first

			# pgfile table -- set 4 defaults first
			my $level   = ($tags->{Level} =~ /\d/) ? $tags->{Level} : 0;
			my $lang    = $tags->{Language} || 'en';
			my $mathobj = $tags->{MO}       || 0;
			my $static  = $tags->{Static}   || 0;

			my @pgfile_ids = ();

			for my $DBsection_id (@DBsection_ids) {
				my $pgfile_id = safe_get_id($tables{pgfile}, 'pgfile_id',
					qq(WHERE filename = ? AND path_id = ? AND DBsection_id = ? AND libraryroot = ?),
					[$pgfile, $path_id, $DBsection_id, $pglib], 1, "", $DBsection_id, $author_id,
					$tags->{Institution}, $pglib, $path_id, $pgfile, 0, $level, $lang, $static, $mathobj);
				push @pgfile_ids, $pgfile_id;
			}

			# morelt table

			my $morelt_id;
			if($tags->{MLT}) {
				for my $DBsection_id (@DBsection_ids) {
					$morelt_id = safe_get_id($tables{morelt}, 'morelt_id',
						qq(WHERE name = ?), [$tags->{MLT}], 1, "", $tags->{MLT}, $DBsection_id, "");

					for my $pgfile_id (@pgfile_ids) {
						$dbh->do("UPDATE `$tables{pgfile}` SET
							morelt_id = \"$morelt_id\" WHERE pgfile_id = \"$pgfile_id\" ");

						dbug "UPDATE pgfile morelt_id for $pgfile_id to $morelt_id\n";
						if($tags->{MLTleader}) {
							$dbh->do("UPDATE `$tables{morelt}` SET
									leader = \"$pgfile_id\" WHERE morelt_id = \"$morelt_id\" ");
							dbug "UPDATE morelt leader for $morelt_id to $pgfile_id\n";
						}
					}
				}
			}

			# keyword table, and problem_keyword many-many table

			foreach my $keyword (@{$tags->{keywords}}) {
				$keyword =~ s/[\'\"]//g;
				$keyword = kwtidy($keyword);
                                # skip it if it is empty
                                next unless $keyword;
				my $keyword_id = safe_get_id($tables{keyword}, 'keyword_id',
					qq(WHERE keyword = ?), [$keyword], 1, "", $keyword);

				for my $pgfile_id (@pgfile_ids) {
					$query = "SELECT pgfile_id FROM `$tables{pgfile_keyword}` WHERE keyword_id = \"$keyword_id\" and pgfile_id=\"$pgfile_id\"";
					my $ok = $dbh->selectrow_array($query);
					if (!defined($ok)) {
						$dbh->do("INSERT INTO `$tables{pgfile_keyword}`
							VALUES(
								\"$pgfile_id\",
								\"$keyword_id\"
							)"
								);
						dbug "INSERT INTO pgfile_keyword VALUES( \"$pgfile_id\", \"$keyword_id\" )\n";
					}
				}
			}					#end foreach keyword

			# Textbook section
			# problem table contains textbook problems

			for my $texthashref (@{$tags->{textinfo}}) {

				# textbook table

				$text = $texthashref->{TitleText};
				$edition = $texthashref->{EditionText} || 0;
				$edition =~ s/[^\d\.]//g;
				$textauthor = $texthashref->{AuthorText};
				next unless($text and $textauthor);
				my $chapnum = $texthashref->{chapter} || -1;
				my $secnum = $texthashref->{section} || -1;
				$query = "SELECT textbook_id FROM `$tables{textbook}` WHERE title = \"$text\" AND edition = \"$edition\" AND author=\"$textauthor\"";
				my $textbook_id = $dbh->selectrow_array($query);
				if (!defined($textbook_id)) {
 				        # make sure edition is an integer
				        $edition = 0 unless $edition;
					$dbh->do("INSERT INTO `$tables{textbook}`
					VALUES(
						NULL,
						\"$text\",
						\"$edition\",
						\"$textauthor\",
						NULL,
						NULL,
						NULL
					)"
							);
					dbug "INSERT INTO textbook VALUES( \"\", \"$text\", \"$edition\", \"$textauthor\", \"\", \"\", \"\" )\n";
					dbug "\nLate add into $tables{textbook} \"$text\", \"$edition\", \"$textauthor\"\n", 1;
					$textbook_id = $dbh->selectrow_array($query);
				}

				# chapter weak table of textbook
				$query = "SELECT chapter_id FROM `$tables{chapter}` WHERE textbook_id = \"$textbook_id\" AND number = \"$chapnum\"";
				my $chapter_id = $dbh->selectrow_array($query);
				if (!defined($chapter_id) && defined($textbook_id)) {
					$dbh->do("INSERT INTO `$tables{chapter}`
					VALUES(
						NULL,
						\"$textbook_id\",
						\"".$chapnum."\",
						\"$tags->{DBchapter}\",
						NULL
					)"
							);
					dbug "\nLate add into $tables{chapter} \"$text\", \"$edition\", \"$textauthor\", $chapnum $tags->{chapter} from $name\n", 1;
					dbug "INSERT INTO chapter VALUES(\"\", \"$textbook_id\", \"".$chapnum."\", \"$tags->{DBchapter}\", \"\" )\n";
					$chapter_id = $dbh->selectrow_array($query);
				}

				# section weak table of textbook
				#
				$tags->{DBsection} = '' if ($secnum < 0);
				$query = "SELECT section_id FROM `$tables{section}` WHERE chapter_id = \"$chapter_id\" AND number = \"$secnum\"";
				my $section_id = $dbh->selectrow_array($query);
				if (!defined($section_id) && defined($chapter_id) && defined($textbook_id)) {
					$dbh->do("INSERT INTO `$tables{section}`
					VALUES(
						NULL,
						\"$chapter_id\",
						\"$secnum\",
						\"$tags->{DBsection}\",
						NULL
					)"
							);
					dbug "INSERT INTO section VALUES(\"\", \"$textbook_id\", \"$secnum\", \"$tags->{DBsection}\", \"\" )\n";
					dbug "\nLate add into $tables{section} \"$text\", \"$edition\", \"$textauthor\", $secnum $tags->{DBsection} from $name\n", 1;
					$section_id = $dbh->selectrow_array($query);
				}

				@textproblems = @{$texthashref->{problems}};
				if ($section_id) {
					for my $tp (@textproblems) {
						$query = "SELECT problem_id FROM `$tables{problem}` WHERE section_id = \"$section_id\" AND number = \"$tp\"";
						my $problem_id = $dbh->selectrow_array($query);
						if (!defined($problem_id)) {
							$dbh->do("INSERT INTO `$tables{problem}`
						VALUES(
							NULL,
							\"$section_id\",
							\"$tp\",
							NULL
						)"
									);
							dbug "INSERT INTO problem VALUES( \"\", \"$section_id\", \"$tp\", \"\" )\n";
							$problem_id = $dbh->selectrow_array($query);
						}

						# pgfile_problem table associates pgfiles with textbook problems
						for my $pgfile_id (@pgfile_ids) {
							$query = "SELECT problem_id FROM `$tables{pgfile_problem}` WHERE problem_id = \"$problem_id\" AND pgfile_id = \"$pgfile_id\"";
							my $pg_problem_id = $dbh->selectrow_array($query);
							if (!defined($pg_problem_id)) {
								$dbh->do("INSERT INTO `$tables{pgfile_problem}`
							VALUES(
								\"$pgfile_id\",
								\"$problem_id\"
							)"
										);
								dbug "INSERT INTO pgfile_problem VALUES( \"$pgfile_id\", \"$problem_id\" )\n";
							}
						}
					}
				}

				#reset tag vars, they may not match the next text/file
				$textauthor=""; $textsection="";
			}
		} else { # This file was not tagged
			# Message if not a pointer
			# print STDERR "File $name is not tagged\n" if not $tags->isplaceholder();
			;
		}
	}
}

print "\n\n";

# Now prune away DBsection, etc, which do not appear in any files
#%my $query = "SELECT chapter_id FROM `$tables{chapter}` WHERE textbook_id = \"$bookid\" AND number = \"$1\"";
#%my $chapid = $dbh->selectrow_array($query);

#select dbs.DBsection_id from OPL_DBsection dbs;
#select COUNT(*) from OPL_pgfile where DBsection_id=857;

my $dbsects = $dbh->selectall_arrayref("SELECT DBsection_id from `$tables{dbsection}`");
for my $sect (@{$dbsects}) {
	$sect = $sect->[0];
	my $srar = $dbh->selectall_arrayref("SELECT * FROM `$tables{pgfile}` WHERE DBsection_id=$sect");
	if(scalar(@{$srar})==0) {
		$dbh->do("DELETE FROM `$tables{dbsection}` WHERE DBsection_id=$sect");
	}
}

my $dbchaps = $dbh->selectall_arrayref("SELECT DBchapter_id from `$tables{dbchapter}`");
for my $chap (@{$dbchaps}) {
	$chap = $chap->[0];
	my $srar = $dbh->selectall_arrayref("SELECT * FROM `$tables{dbsection}` WHERE DBchapter_id=$chap");
	if(scalar(@{$srar})==0) {
		$dbh->do("DELETE FROM `$tables{dbchapter}` WHERE DBchapter_id=$chap");
	}
}

# Note: this used to build some JSON versions of the textbooks, subjects and directory trees
# that could be used in the library browswer. It's functionality is now
# in the updateOPLextras.pl script.

$dbh->disconnect;

if ($ce->{problemLibrary}{showLibraryLocalStats} ||
    $ce->{problemLibrary}{showLibraryGlobalStats}) {
  print "\nUpdating Library Statistics.\n";
  do $ENV{WEBWORK_ROOT}.'/bin/update-OPL-statistics.pl';

  print "\nLoading global statistics (if possible).\n";
  do $ENV{WEBWORK_ROOT}.'/bin/load-OPL-global-statistics.pl';
}

print "\nDone.\n";
