Skip to content
Snippets Groups Projects
Commit dcc9f4c2 authored by PavelBegunkov's avatar PavelBegunkov Committed by Роман Штейнберг
Browse files

Merge groups and eliminate dublicates, refs #107

parent 393b4111
Branches
Tags
No related merge requests found
......@@ -56,3 +56,67 @@ WHERE CONCAT( st1.LastName, st1.FirstName, st1.SecondName ) LIKE (
GROUP BY st1.LastName, rating_table.SubmoduleID
ORDER BY cnt desc
# студенты - дубликаты
select CONCAT(students.LastName,' ',students.FirstName,' ',students.SecondName) as name,
t.disciplineID,
t.id as logID,
t.StudentID
from (
SELECT max(logs_binds_students.id) as id, StudentID, DisciplineID, Type
from logs_binds_students
GROUP by logs_binds_students.StudentID, logs_binds_students.DisciplineID
) as t2
inner join logs_binds_students as t on t2.id = t.id
inner join disciplines on disciplines.id = t.DisciplineID
inner join students_groups on students_groups.StudentID = t.StudentID and students_groups.SemesterID = disciplines.SemesterID
inner join students on students.id = t.studentid
left join disciplines_groups on disciplines_groups.DisciplineID = t.DisciplineID and disciplines_groups.GroupID = students_groups.GroupID
left join disciplines_students on disciplines_students.StudentID = t.StudentID and disciplines_students.DisciplineID = t.DisciplineID
where t.Type = 'attach'
and disciplines_students.id is null
and disciplines_groups.id is null
-- список дубликатов и кол-во их оценок
select t1.name,
t1.recentSt,
t1.lastSt,
t2.cnt as beforeCnt,
t3.cnt as afterCnt,
tac1.Login IS NOT NULL,
tac2.Login IS NOT NULL
from (
select min(students.id) as recentSt, min(students.AccountID) as recentAcc,
max(students.id) as lastSt, max(students.AccountID) as lastAcc,
CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName) as name
from students
group by CONCAT( students.LastName, students.FirstName, students.SecondName)
having count(*) > 1
) as t1
left join accounts as tac1 on tac1.id = t1.recentAcc
left join accounts as tac2 on tac2.id = t1.lastAcc
left join (
select count(rating_table.Rate) as cnt,
CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName) as name
from students
left join rating_table on students.id = rating_table.StudentID
where rating_table.Rate IS NOT NULL
group by CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName)
) as t2 on STRCMP(t2.name, t1.name) = 0
left join (
select t4.name,
count(*) as cnt
from (
select count(rating_table.Rate) as cnt,
CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName) as name
from students
left join rating_table on students.id = rating_table.StudentID
where rating_table.Rate IS NOT NULL
group by CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName), rating_table.submoduleID
) as t4
group by t4.name
) as t3 on STRCMP(t3.name, t1.name) = 0
order by t1.recentSt
delimiter //
DROP FUNCTION IF EXISTS GetStudentAccountID//
CREATE FUNCTION `GetStudentAccountID` (
`pStudentID` INT
) RETURNS INT
BEGIN
RETURN (SELECT students.AccountID
from students
where students.id = pStudentID
limit 1
);
END//
DROP FUNCTION IF EXISTS SpoofGroups//
CREATE FUNCTION `SpoofGroups` (
`pDestStudentID` INT,
`pSrcStudentID` INT,
`pPriority` enum('none','src','dst')
) RETURNS INT
BEGIN
CASE pPriority
-- no priority
WHEN 'none' THEN
UPDATE students_groups
SET students_groups.StudentID = pDestStudentID
WHERE students_groups.StudentID = pSrcStudentID;
-- destination student priority
WHEN 'dst' THEN
UPDATE IGNORE students_groups
SET students_groups.StudentID = pDestStudentID
WHERE students_groups.StudentID = pSrcStudentID;
-- source student priority
WHEN 'src' THEN
INSERT INTO students_groups (StudentID, GroupID, SemesterID, Date, State)
SELECT pDestStudentID,
sg.GroupID, sg.SemesterID,
sg.Date, sg.State
FROM students_groups as sg
WHERE sg.StudentID = pSrcStudentID
ON DUPLICATE KEY UPDATE
students_groups.GroupID = VALUES(GroupID),
students_groups.Date = VALUES(Date),
students_groups.State = VALUES(State);
END CASE;
DELETE FROM students_groups
WHERE students_groups.StudentID = pSrcStudentID;
RETURN 0;
END //
DROP FUNCTION IF EXISTS SpoofRates//
CREATE FUNCTION `SpoofRates` (
`pDestStudentID` INT,
`pSrcStudentID` INT,
`pPriority` enum('none','src','dst')
) RETURNS INT
BEGIN
CASE pPriority
-- no priority
WHEN 'none' THEN
UPDATE rating_table
SET rating_table.StudentID = pDestStudentID
WHERE rating_table.StudentID = pSrcStudentID;
UPDATE exam_period_options
SET exam_period_options.StudentID = pDestStudentID
WHERE exam_period_options.StudentID = pSrcStudentID;
-- destination student priority
WHEN 'dst' THEN
UPDATE IGNORE rating_table
SET rating_table.StudentID = pDestStudentID
WHERE rating_table.StudentID = pSrcStudentID;
UPDATE IGNORE exam_period_options
SET exam_period_options.StudentID = pDestStudentID
WHERE exam_period_options.StudentID = pSrcStudentID;
-- source student priority
WHEN 'src' THEN
INSERT INTO rating_table (StudentID, TeacherID, SubmoduleID, Rate, Date)
SELECT pDestStudentID,
rt.TeacherID, rt.SubmoduleID,
rt.Rate, rt.Date
FROM rating_table as rt
WHERE rt.StudentID = pSrcStudentID
ON DUPLICATE KEY UPDATE
rating_table.Rate = VALUES(Rate),
rating_table.Date = VALUES(Date);
INSERT INTO exam_period_options (SubmoduleID, StudentID, Type)
SELECT epo.SubmoduleID,
pDestStudentID,
epo.Type
FROM exam_period_options as epo
WHERE epo.StudentID = pSrcStudentID
ON DUPLICATE KEY UPDATE
exam_period_options.Type = VALUES(Type);
END CASE;
DELETE FROM rating_table
WHERE rating_table.StudentID = pSrcStudentID;
DELETE FROM exam_period_options
WHERE exam_period_options.StudentID = pSrcStudentID;
RETURN 0;
END //
DROP FUNCTION IF EXISTS GlueAccount//
CREATE FUNCTION `GlueAccount` (
`pDstAcc` INT,
`pSrcAcc` INT,
-- determine who dominant in case of duplication
-- 'none' option on violation just return FALSE
`pPriority` enum('none','src','dst')
) RETURNS INT
NO SQL
BEGIN
DECLARE login VARCHAR(50);
DECLARE mail VARCHAR(64);
DECLARE pass VARCHAR(255);
SELECT accounts.Login, accounts.EMail, accounts.Password INTO login, mail, pass
FROM accounts
WHERE accounts.id = pSrcAcc LIMIT 1;
delete from accounts
where accounts.id = pSrcAcc;
update accounts
set
accounts.Login = COALESCE(accounts.Login, login),
accounts.EMail = COALESCE(accounts.EMail, mail),
accounts.Password = COALESCE(accounts.Password, pass)
where accounts.id = pDstAcc;
RETURN 0;
END //
/**
GlueStudents(pDestStudentID, pSrcStudentID, pPriority) - объединение дубликатов
pDestStudentID - ид студента, который останется
pSrcStudentID - ид студента, данные которого необходимо перенести в pSrcStudentID
pPriority - приоритет оценок и группы:
none - выдает ошибку
src - данные pSrcStudentID приоритетней
dst - данные pDestStudentID приоритетней
*/
DROP FUNCTION IF EXISTS GlueStudent//
CREATE FUNCTION `GlueStudent` (
`pDestStudentID` INT,
`pSrcStudentID` INT,
-- determine who dominant in case of duplication
-- 'none' option on violation just return FALSE
`pPriority` enum('none','src','dst')
) RETURNS INT
NO SQL
BEGIN
DECLARE vt INT DEFAULT 0;
-- DECLARE EXIT HANDLER FOR SQLEXCEPTION BEGIN
-- ROLLBACK;
-- SELECT -1;
-- END;
-- START TRANSACTION;
SET vt = SpoofRates(pDestStudentID, pSrcStudentID, pPriority);
SET vt = SpoofGroups(pDestStudentID, pSrcStudentID, pPriority);
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE vDiscID INT;
DECLARE cur CURSOR FOR
SELECT disciplines_students.disciplineID
FROM disciplines_students
WHERE disciplines_students.StudentID = pSrcStudentID;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
OPEN cur;
read_loop: LOOP
FETCH cur INTO vDiscID;
IF done THEN
LEAVE read_loop;
END IF;
SET vt = BindStudent(1, vDiscID, pDestStudentID);
END LOOP;
CLOSE cur;
END;
DELETE FROM disciplines_students
WHERE disciplines_students.StudentID = pSrcStudentID;
UPDATE LOW_PRIORITY logs_rating
SET logs_rating.StudentID = pDestStudentID
WHERE logs_rating.StudentID = pSrcStudentID;
UPDATE LOW_PRIORITY logs_binds_students
SET logs_binds_students.StudentID = pDestStudentID
WHERE logs_binds_students.StudentID = pSrcStudentID;
SET vt = DeleteStudent(pSrcStudentID);
SET vt = GlueAccount(GetStudentAccountID(pDestStudentID), GetStudentAccountID(pSrcStudentID), pPriority);
-- COMMIT;
RETURN 0;
END //
DROP FUNCTION IF EXISTS DeleteStudent //
CREATE FUNCTION `DeleteStudent` (
`pStudentID` INT
) RETURNS INT
NO SQL
BEGIN
DECLARE vAccountID INT(11) DEFAULT -1;
SELECT AccountID INTO vAccountID
FROM students
WHERE students.ID = pStudentID
LIMIT 1;
DELETE
FROM disciplines_students
WHERE disciplines_students.StudentID = pStudentID;
DELETE
FROM rating_table
WHERE rating_table.StudentID = pStudentID;
DELETE
FROM students_groups
WHERE students_groups.StudentID = pStudentID;
DELETE
FROM exam_period_options
WHERE exam_period_options.StudentID = pStudentID;
DELETE
FROM logs_rating
WHERE logs_rating.StudentID = pStudentID;
DELETE
FROM logs_binds_students
WHERE logs_binds_students.StudentID = pStudentID;
DELETE
FROM students
WHERE students.ID = pStudentID;
DELETE
FROM recovery_tokens
WHERE recovery_tokens.AccountID = vAccountID;
DELETE
FROM logs_signin
WHERE logs_signin.AccountID = vAccountID;
RETURN 0;
END //
DROP PROCEDURE IF EXISTS MergeAllWeak //
CREATE PROCEDURE `MergeAllWeak` (
`pPriority` enum('none','src','dst')
) NO SQL
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE vRes, stDst, stSrc INT default -1;
DECLARE cur1 CURSOR FOR
select min(students.id) as recentID,
max(students.id) as lastID
from (
select CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName) as name
from students
group by CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName)
having count(*) > 1
) as t1
left join students on STRCMP(CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName), t1.name) = 0
group by CONCAT( students.LastName, ' ', students.FirstName, ' ', students.SecondName);
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
OPEN cur1;
read_loop: LOOP
FETCH cur1 INTO stDst, stSrc;
IF done THEN
LEAVE read_loop;
END IF;
BEGIN
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION ROLLBACK;
START TRANSACTION;
SET vRes = GlueStudent(stDst, stSrc, pPriority);
COMMIT;
END;
END LOOP;
CLOSE cur1;
END //
DELIMITER ;
delimiter //
-- смержить группы
DROP FUNCTION IF EXISTS MergeGroups//
CREATE FUNCTION `MergeGroups` (
`srcGroup` INT,
`dstGroup` INT
) RETURNS INT
BEGIN
UPDATE IGNORE disciplines_groups
SET disciplines_groups.GroupID = dstGroup
WHERE disciplines_groups.GroupID = srcGroup;
UPDATE IGNORE students_groups
SET students_groups.GroupID = dstGroup
WHERE students_groups.GroupID = srcGroup;
UPDATE IGNORE logs_binds_groups
SET logs_binds_groups.GroupID = dstGroup
WHERE logs_binds_groups.GroupID = srcGroup;
delete from disciplines_groups
where disciplines_groups.GroupID = srcGroup;
delete from students_groups
where students_groups.GroupID = srcGroup;
delete from logs_binds_groups
where logs_binds_groups.GroupID = srcGroup;
delete from study_groups
where id = srcGroup;
return 0;
END//
DELIMITER ;
-- список потенциальных дубликатов
select count(*) as cntGroups,
GROUP_CONCAT(view_groups.groupid SEPARATOR ',') as groupsIds,
GROUP_CONCAT(view_groups.SpecName SEPARATOR ',') as groupsSpecs,
view_groups.*
from view_groups
group by gradeid, groupNum, FacultyID
having count(*) > 1
-- смерджить список потенциальных дубликатов
drop table if exists tGroupMergeTable;
CREATE TEMPORARY TABLE tGroupMergeTable as (
select view_groups.groupid as src,
BaseGroups.dst as dst
from (
select min(groupid) as dst, gradeid, groupNum, FacultyID
from view_groups
group by gradeid, groupNum, FacultyID
having count(*) > 1
) as BaseGroups
left join view_groups on view_groups.gradeid = BaseGroups.gradeid and
view_groups.groupNum = BaseGroups.groupNum and
view_groups.FacultyID = BaseGroups.FacultyID and
view_groups.groupid != BaseGroups.dst
);
select *
from tGroupMergeTable;
select MergeGroups(src, dst)
from tGroupMergeTable;
-- удаление специализаций, к которым не привязана ни одна группа
delete from specializations
where id not in (
select study_groups.SpecializationID from study_groups
)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment