From df5559fe07c2db463e71b311672a0825bf43de2e Mon Sep 17 00:00:00 2001
From: kirill kirchak <kirchak@sfedu.ru>
Date: Tue, 7 May 2024 20:43:02 +0000
Subject: [PATCH] add API and changed compare logic

---
 clonus/methods/client.py            |  70 +++++++++++++++
 clonus/methods/counting_time.py     |  11 +++
 clonus/methods/fp_method_builder.py | 129 +++++++++++++++++++++-------
 clonus/models.py                    |   2 +
 clonus/serializers.py               |   9 ++
 clonus/views.py                     |  79 +++++++++++++++--
 requirements.txt                    |   4 +-
 7 files changed, 267 insertions(+), 37 deletions(-)
 create mode 100644 clonus/methods/client.py
 create mode 100644 clonus/methods/counting_time.py
 create mode 100644 clonus/serializers.py

diff --git a/clonus/methods/client.py b/clonus/methods/client.py
new file mode 100644
index 0000000..c277311
--- /dev/null
+++ b/clonus/methods/client.py
@@ -0,0 +1,70 @@
+from pathlib import Path
+import requests
+from datetime import datetime
+
+def get_files_for_compare(path) -> datetime:
+    main_path = Path(path)
+    url = 'http://127.0.0.1:8000/api/v1/SummaryAPIView/'  # ?
+    if main_path.exists():
+        all_files = [p for p in main_path.rglob("*.py")]
+        groups_files = group_by(all_files, main_path)
+        res_time = datetime(2024, 5, 7)
+        for g in groups_files:
+            temp_file = g[0]
+            for k in range(1, len(g) - 1):
+                for j in range(k + 1, len(g)):
+                    try:
+                        with open(g[k], 'r') as file1, open(g[j], 'r') as file2, open(temp_file, 'r') as file_template:
+                            start_time = datetime.now()
+                            response = requests.post(url, files={'file1': file1, 'file2': file2,
+                                                                 'file_template': file_template},
+                                                     data={'gram_size': 8, 'window_size': 3})
+                            cur_time = datetime.now() - start_time
+                            res_time += cur_time
+                            write_to_file_json(response.json(), f'{main_path}\\report.txt',
+                                               dict({'absolute_path_to_file1': g[k],
+                                                     'absolute_path_to_file2': g[j],
+                                                     'absolute_path_to_file_template': temp_file}))
+                    except IOError as e:
+                        print("IOError:" + IOError.strerror)
+
+        return res_time
+
+
+def group_by(files, main_path):
+    ind = len(main_path.parts)
+    res = []
+    if len(files) != 0:
+        d = dict()
+        for file in files:
+            key = "".join(file.parts[ind + 1:])
+            if d.get(key) is None:
+                d[key] = [file]
+            else:
+                d[key].append(file)
+
+        for k in d:
+            sorted(d[k], key=lambda x: 'templates' in x.parts)
+            res.append(list(d[k]))
+        return res
+
+
+def write_to_file_json(response, to_path, absolute_path):
+    t = [set(v.parts) for k, v in absolute_path.items()]
+    f, s = t[:-1]
+    names = set(f - s)
+    names.update(s - f)
+    names = list(names)
+    js_info = [response[k] for k in response][0]
+    reg = '{\\}+'
+    with open(to_path, 'a+') as report:
+        report.write(f'РЎСЂР°РІРЅРµРЅРёРµ СЂР°Р±РѕС‚ {names[0]} - {names[1]}\n')
+        report.write("РћС‚С‡РµС‚:\n")
+        for k, v in js_info.items():
+            if k == 'q1':
+                var = [f'{k}: {v} \n' for k, v in absolute_path.items()]
+                report.writelines(var)
+            report.write(f'{k}: {v}\n')
+
+        report.write(
+            "--------------------------------------------------------------------------------------------------\n")
diff --git a/clonus/methods/counting_time.py b/clonus/methods/counting_time.py
new file mode 100644
index 0000000..e372066
--- /dev/null
+++ b/clonus/methods/counting_time.py
@@ -0,0 +1,11 @@
+from datetime import datetime
+
+from clonus.methods.client import get_files_for_compare
+
+
+def time_compare():
+    start_time = datetime.now()
+    method_time = get_files_for_compare("C:\\example")
+    res_time = datetime.now() - start_time
+    print(f'Р’СЂРµРјСЏ СЃСЂР°РІРЅРµРЅРёСЏ РєР°С‚Р°Р»РѕРіР° СЃ С„Р°Р№Р»Р°РјРё: {res_time}')
+    print(f'Р’СЂРµРјСЏ РІС‹РїРѕР»РЅРµРЅРёСЏ РјРµС‚РѕРґР° Р±РµР· СѓС‡РµС‚Р° РІСЂРµРјРµРЅРё Р·Р°С‚СЂР°С‡РµРЅРЅРѕРіРѕ РЅР° Р·Р°РїРёСЃСЊ РІ С„Р°Р№Р»(С‡РёСЃС‚РѕРµ РІСЂРµРјСЏ): {str(method_time)[10:]}')
diff --git a/clonus/methods/fp_method_builder.py b/clonus/methods/fp_method_builder.py
index 318dc2b..ae50367 100644
--- a/clonus/methods/fp_method_builder.py
+++ b/clonus/methods/fp_method_builder.py
@@ -5,16 +5,16 @@ from .fp_method import FpMethodResult
 
 
 class FingerprintMethodBuilder(MethodBuilder):
-    input_files: list                   # СЃРїРёСЃРѕРє РёР· 2 РёРјРµРЅ С„Р°Р№Р»РѕРІ РґР»СЏ РїСЂРѕРІРµСЂРєРё
-    pre_proc_out: list                  # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ pre_processing_step
-    proc_out: list                      # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ processing_step
-    post_proc_out: FpMethodResult     # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ processing_step
-    output: list                        # СЂРµР·СѓР»СЊС‚Р°С‚ РІС‹РїРѕР»РЅРµРЅРёСЏ РјРµС‚РѕРґР°
-    gram_size: int                      # СЂР°Р·РјРµСЂ K-РіСЂР°РјС‹
-    hash_param: int                     # РїР°СЂР°РјРµС‚СЂ С…СЌС€-С„СѓРєРЅС†РёРё
-    window_size: int                    # СЂР°Р·РјРµСЂ РѕРєРЅР° (РґР»СЏ Р°Р»РіРѕСЂРёС‚РјР° winnowing)
-
-    def __init__(self, f_list: list, gram_size=8,window_size=3,hash_param=273) -> None:
+    input_files: list  # СЃРїРёСЃРѕРє РёР· 2 РёРјРµРЅ С„Р°Р№Р»РѕРІ РґР»СЏ РїСЂРѕРІРµСЂРєРё
+    pre_proc_out: list  # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ pre_processing_step
+    proc_out: list  # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ processing_step
+    post_proc_out: FpMethodResult  # РІС‹С…РѕРґРЅС‹Рµ РґР°РЅРЅС‹Рµ processing_step
+    output: list  # СЂРµР·СѓР»СЊС‚Р°С‚ РІС‹РїРѕР»РЅРµРЅРёСЏ РјРµС‚РѕРґР°
+    gram_size: int  # СЂР°Р·РјРµСЂ K-РіСЂР°РјС‹
+    hash_param: int  # РїР°СЂР°РјРµС‚СЂ С…СЌС€-С„СѓРєРЅС†РёРё
+    window_size: int  # СЂР°Р·РјРµСЂ РѕРєРЅР° (РґР»СЏ Р°Р»РіРѕСЂРёС‚РјР° winnowing)
+
+    def __init__(self, f_list: list, gram_size=8, window_size=3, hash_param=273) -> None:
         super().__init__()
         self.input_files = f_list
         self.pre_proc_out = []
@@ -30,36 +30,96 @@ class FingerprintMethodBuilder(MethodBuilder):
             self.pre_proc_out.append(token_lst)
 
     def processing_step(self):
-        tokens_text = []        # СЃРїРёСЃРѕРє С‚РѕРєРµРЅРѕРІ РІ СЃС‚СЂРѕРєРѕРІРѕРј РїСЂРµРґСЃС‚Р°РІР»РµРЅРёРё
-        gh_list = []            # СЃРїРёСЃРѕРє Рљ-РіСЂР°Рј Рё С…СЌС€-Р·РЅР°С‡РµРЅРёР№ С‚РµРєСЃС‚РѕРІ
-        finger_prints = []      # fingerprints С‚РµРєСЃС‚РѕРІ
+        tokens_text = []  # СЃРїРёСЃРѕРє С‚РѕРєРµРЅРѕРІ РІ СЃС‚СЂРѕРєРѕРІРѕРј РїСЂРµРґСЃС‚Р°РІР»РµРЅРёРё
+        gh_list = []  # СЃРїРёСЃРѕРє Рљ-РіСЂР°Рј Рё С…СЌС€-Р·РЅР°С‡РµРЅРёР№ С‚РµРєСЃС‚РѕРІ
+        finger_prints = []  # fingerprints С‚РµРєСЃС‚РѕРІ
+        prints_template = []
+        tmp_i_merged_points = []
+
 
         if len(self.input_files) != len(self.pre_proc_out):
             raise AttributeError("Fingerprint method prepocessing error")
 
         for elem in self.pre_proc_out:
             tokens_text.append(self._to_text(elem))
-
-        for i in range(len(tokens_text)):
-            i_grams = self._get_k_grams_from_text(
-                tokens_text[i], self.gram_size, self.hash_param)
-            i_hashes = self._get_hashes_from_grams(i_grams)
-            i_finger_prints = self._winnow(i_hashes, self.window_size)
-
-            gh_list.append([i_grams, i_hashes])
-            finger_prints.append(i_finger_prints)
-
+        len_lst_token = len(tokens_text)#1723 1846  1865 1947
         for i in range(len(tokens_text)):
+            if i == len_lst_token - 1:
+                grams_template = self._get_k_grams_from_text(tokens_text[len_lst_token - 1], self.gram_size,
+                                                             self.hash_param)
+                hashes_template = self._get_hashes_from_grams(grams_template)
+                prints_template = self._winnow(hashes_template, self.window_size)
+
+            else:
+                i_grams = self._get_k_grams_from_text(
+                    tokens_text[i], self.gram_size, self.hash_param)
+                i_hashes = self._get_hashes_from_grams(i_grams)
+                i_finger_prints = self._winnow(i_hashes, self.window_size)
+                gh_list.append([i_grams, i_hashes])
+                finger_prints.append(i_finger_prints)
+
+        # -------------------------------list of copy-pairs----------------------------------------
+
+        for i in range(len(tokens_text) - 1):
             i_points = self._get_points_lst(
                 finger_prints, self.pre_proc_out[i], gh_list[i][1], gh_list[i][0])
             i_merged_points = self._get_merged_points(i_points)
-            self.proc_out.append(i_merged_points)
+
+            tmp_i_points = self._get_points_lst(
+                [finger_prints[i], prints_template], self.pre_proc_out[i], gh_list[i][1], gh_list[i][0])
+            tmp_i_merged_points.append(self._get_merged_points(tmp_i_points))
+
+
+            res = self.intersection(i_merged_points, tmp_i_merged_points[i])
+            self.proc_out.append(res)
 
         self.proc_out.append(self._distance_simpson_lst(finger_prints))
 
     def post_processing_step(self) -> FpMethodResult:
         return FpMethodResult(self.proc_out[0], self.proc_out[1], self.proc_out[2])
 
+    def intersection(self, lst_i, lst_tmp_i):
+        new_lst = []
+        stop_flag = False
+        start = 0
+        check = False
+        if len(lst_tmp_i) != 0:
+            for j in range(len(lst_i)):
+                for i in range(len(lst_tmp_i)):
+                    if lst_i[j][0] > lst_tmp_i[len(lst_tmp_i)-1][0]:
+                        stop_flag = True
+                        start = j
+                        break
+                    if lst_tmp_i[i][0] > lst_i[j][1]:
+                        check = True
+                        break
+                    if lst_tmp_i[i][0] == lst_i[j][0]:
+                        if lst_tmp_i[i][1] == lst_i[j][1]:
+                            check = False
+                            break
+                        if lst_tmp_i[i][1] < lst_i[j][1]:
+                            new_lst.append([lst_tmp_i[i][1] + 1, lst_i[j][1]])
+                            check = False
+                            break
+                    elif lst_tmp_i[i][0] > lst_i[j][0]:
+                        if lst_tmp_i[i][1] < lst_i[j][1]:
+                            new_lst.extend([[lst_i[j][0], lst_tmp_i[i][0]-1], [lst_tmp_i[i][1]+1, lst_i[j][1]]])
+                            check = False
+                            break
+                        if lst_tmp_i[i][1] == lst_i[j][1]:
+                            new_lst.append([lst_i[j][0], lst_tmp_i[i][0]-1])
+                            check = False
+                            break
+                    elif j <= i:
+                        check = True
+                if check:
+                    new_lst.append(lst_i[j])
+                    check = False
+                if stop_flag:
+                    break
+        new_lst.extend(lst_i[start:])
+        return new_lst
+
     def _to_list(self, arr):
         """Р’РѕР·РІСЂР°С‰Р°РµС‚ СЃР°РјРё С‚РѕРєРµРЅС‹ РІ СЃРїРёСЃРєРµ"""
         return [str(x[0]) for x in arr]
@@ -98,7 +158,7 @@ class FingerprintMethodBuilder(MethodBuilder):
             elif tokens[i][0] in pygments.token.Name.Function:
                 res.append(('F', source_cnt, product_cnt))
                 product_cnt += 1
-            elif i!= 0 and tokens[i-1][0] == pygments.token.Text.Whitespace: 
+            elif i != 0 and tokens[i - 1][0] == pygments.token.Text.Whitespace:
                 pass
             elif not (tokens[i][0] == pygments.token.Text or tokens[i][0] in pygments.token.Comment):
                 res.append((tokens[i][1], source_cnt, product_cnt))
@@ -133,9 +193,9 @@ class FingerprintMethodBuilder(MethodBuilder):
     def _get_k_grams_from_text(self, text, k=25, q=31):
         """Р Р°Р·РґРµР»РёС‚СЊ С‚РµРєСЃС‚ РЅР° K-РіСЂР°РјРјС‹"""
         grams = []
-        for i in range(0, len(text)-k+1):
-            hash_gram = self._get_hash_from_gram(text[i:i+k], q)
-            gram = Gram(text[i:i+k], hash_gram, i, i+k)
+        for i in range(0, len(text) - k + 1):
+            hash_gram = self._get_hash_from_gram(text[i:i + k], q)
+            gram = Gram(text[i:i + k], hash_gram, i, i + k)
             grams.append(gram)
         return grams
 
@@ -161,10 +221,10 @@ class FingerprintMethodBuilder(MethodBuilder):
         n = len(hashes)
         prints = []
         windows = []
-        prev_min = 0
+        prev_min = -1
         current_min = 0
         for i in range(n - w):
-            window = hashes[i:i+w]
+            window = hashes[i:i + w]
             windows.append(window)
             current_min = i + self._min_index(window)
             if current_min != prev_min:
@@ -236,3 +296,12 @@ class Gram:
         self.hash = hash_gram
         self.start_pos = start_pos
         self.end_pos = end_pos
+
+
+class TemplateCompare:
+    def __init__(self, path, token, hashes, grams, finger_prints):
+        self.path = path
+        self.token = token
+        self.hashes = hashes
+        self.grams = grams
+        self.finger_prints = finger_prints
diff --git a/clonus/models.py b/clonus/models.py
index c0e0339..5fdd638 100644
--- a/clonus/models.py
+++ b/clonus/models.py
@@ -12,6 +12,7 @@ class Package(models.Model):
     id: int
     file1 = models.FilePathField(null=True)
     file2 = models.FilePathField(null=True)
+    file_template = models.FilePathField(null=True, default="РїРѕРєР° С‡С‚Рѕ РІСЂСѓС‡РЅСѓСЋ РІРїРёСЃС‹РІР°С‚СЊ")#attention!!!
     path = models.FilePathField()
     hash = models.CharField(max_length=32)
     gram_size = models.PositiveSmallIntegerField(default=8)
@@ -35,6 +36,7 @@ class Package(models.Model):
         rmtree(self.path)
 
 
+
 class MultiPackage(models.Model):
     id: int
     files = models.FilePathField(null=True)
diff --git a/clonus/serializers.py b/clonus/serializers.py
new file mode 100644
index 0000000..58870ed
--- /dev/null
+++ b/clonus/serializers.py
@@ -0,0 +1,9 @@
+from rest_framework import serializers
+from glob import glob
+from clonus.models import Package
+
+
+class PackageSerialiser(serializers.ModelSerializer):
+    class Meta:
+        model = Package
+        fields = ("gram_size", "window_size")
diff --git a/clonus/views.py b/clonus/views.py
index 6a1dbf8..083ef07 100644
--- a/clonus/views.py
+++ b/clonus/views.py
@@ -1,20 +1,82 @@
+from django.forms import model_to_dict
 from django.shortcuts import render, redirect
 from django.http import HttpRequest
 from pathlib import Path
 from collections import deque
 from itertools import combinations
 from more_itertools import ilen
+from rest_framework.response import Response
 
 from clonus.forms import FileToFileForm, ManyFilesForm
+from clonus.methods.client import get_files_for_compare
+from clonus.methods.counting_time import time_compare
 from clonus.models import Package, MultiPackage
 
 from clonus.methods.fp_method_builder import FingerprintMethodBuilder
 from clonus.methods.method_configurator import MethodConfigurator
+from rest_framework.views import APIView
+
+from clonus.serializers import PackageSerialiser
+
 
 # Create your views here.
+class SummaryAPIView(APIView):
+    def get(self, request):
+        lst = Package.objects.all()
+        return Response({'res': PackageSerialiser(lst, many=True).data})
+    def post (self,request):
+
+        dict_data = {'gram_size': request.data['gram_size'], 'window_size': request.data['window_size']}
+        serialiser = PackageSerialiser(data=dict_data)
+        serialiser.is_valid(raise_exception=True)
+
+        obj_package = serialiser.save()
+        obj_package.mkdir()
+
+        files = request.FILES
+        i = 0
+        for file in files:
+            f = files[file]
+            i += 1
+            handle_uploaded_file(f, obj_package.path, i)
+
+        obj_package.file1 = f'{obj_package.path}\\[1]{request.FILES["file1"].name}'
+        obj_package.file2 = f'{obj_package.path}\\[2]{request.FILES["file2"].name}'
+        obj_package.file_template = f'{obj_package.path}\\[3]{request.FILES["file_template"].name}'
+        obj_package.gen_hash()
+        obj_package.save()
+
+        filenames = [obj_package.file1, obj_package.file2, obj_package.file_template]
+        fp_builder = FingerprintMethodBuilder(filenames, obj_package.gram_size, obj_package.window_size)
+        config = MethodConfigurator(fp_builder)
+        method_res = config.make_method()
+        obj_package.coeff = method_res.clone_pct
+        obj_package.processed = True
+        obj_package.save()
+
+        context = {
+            "hash": obj_package.hash,
+            "coeff": obj_package.coeff * 100,
+            "file1": str(obj_package.file1),
+            "file2": str(obj_package.file2),
+            "file_template": str(obj_package.file_template),
+            "f1": Path(obj_package.file1).name,
+            "f2": Path(obj_package.file2).name,
+            "date_compare": obj_package.date,
+            "q1": method_res.clone_parts_1,
+            "q2": method_res.clone_parts_2,
+
+        }
+        return Response({"": context})
+
+def handle_uploaded_file(f,path, i):
+    with open(f'{path}\\[{i}]{f.name}', "wb+") as destination:
+        for chunk in f.chunks():
+            destination.write(chunk)
 
 
 def index(request: HttpRequest):
+    time_compare()
     return render(request, "index.html")
 
 
@@ -78,7 +140,11 @@ def process_file(fil: Path, l: "list[list[int]]"):
     cur = offsets[0][0]
     end_of_q = False
     with open(fil, "r", encoding="utf-8") as f:
+        # print(res)
         for line in f:
+            if pos > l[len(l) - 1][1]:
+                contents += line
+                continue
             contents += line
             line_count += 1
             pos += len(line)
@@ -93,7 +159,6 @@ def process_file(fil: Path, l: "list[list[int]]"):
                     except IndexError:
                         end_of_q = True
                     res[-1][-1] = line_count
-        # print(res)
     return contents, res
 
 
@@ -103,7 +168,7 @@ def summary(request: HttpRequest, h: str):
     except Package.DoesNotExist:
         return redirect("index")
 
-    filenames = [p.file1, p.file2]
+    filenames = [p.file1, p.file2, p.file_template]
     fp_builder = FingerprintMethodBuilder(filenames, p.gram_size, p.window_size)
     config = MethodConfigurator(fp_builder)
     method_res = config.make_method()
@@ -121,13 +186,15 @@ def summary(request: HttpRequest, h: str):
         "file2": file2,
         "f1": Path(p.file1).name,
         "f2": Path(p.file2).name,
+        # "q1": method_res.clone_parts_1,
+        # "q2": method_res.clone_parts_2
         "q1": ", ".join(
-            "{start: " + str(i) + ", end: " + str(j) + ", color: 'yellow'}"
-            for i, j in q1
+           "{start: " + str(i) + ", end: " + str(j) + ", color: 'yellow'}"
+           for i, j in q1
         ),
         "q2": ", ".join(
-            "{start: " + str(i) + ", end: " + str(j) + ", color: 'yellow'}"
-            for i, j in q2
+          "{start: " + str(i) + ", end: " + str(j) + ", color: 'yellow'}"
+           for i, j in q2
         ),
     }
     return render(request, "summary.html", context)
diff --git a/requirements.txt b/requirements.txt
index 09fb36b..36d11d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,6 @@ django-bootstrap-v5
 crispy-bootstrap5
 django-crispy-forms
 more-itertools
-Pygments
\ No newline at end of file
+djangorestframework
+Pygments
+requests
-- 
GitLab