pour déploiement auto v2 via gitlab
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

479 lines
18 KiB

return: text
lang: plpgsql
parameters:
p0:
type: bigint
name: i_import_id
p1:
type: text
name: i_file
src: |
DECLARE
_file_code TEXT;
file TEXT;
result TEXT;
_i INT;
BEGIN
RAISE NOTICE '%' , 'Creation table de travail';
_file_code = upper((string_to_array(i_file,'.'))[array_upper(string_to_array(i_file,'.'),1)-1]);
IF NOT EXISTS (SELECT * FROM pg_class WHERE relnamespace = pg_my_temp_schema() AND relname = 'w_mat2a_data') THEN
DROP TABLE IF EXISTS w_mat2a_data;
DROP SEQUENCE IF EXISTS w_mat2a_data_seq;
CREATE TEMP SEQUENCE w_mat2a_data_seq INCREMENT BY 100;
CREATE TEMP TABLE w_mat2a_data (
data text,
sequence bigint DEFAULT nextval('w_mat2a_data_seq'::regclass),
table_sequence numeric default 0,
title_sequence numeric default 0,
header_sequence numeric default 0,
row_sequence numeric default 0,
line_type text,
columns text[]
);
CREATE INDEX i_w_mat2a_data
ON w_mat2a_data
USING btree
(sequence);
END IF;
TRUNCATE w_mat2a_data;
RAISE NOTICE '%' , 'Copie des donnees';
file = replace(i_file,chr(92),'/');
BEGIN
EXECUTE ('COPY w_mat2a_data (data)
FROM ''' || file || '''
WITH DELIMITER AS E''' || chr(92) || '01''');
EXCEPTION
WHEN others THEN RAISE NOTICE 'Erreur %' , 'Fichier ' || file || ' incompatible';
return 'KO. Fichier ' || file || ' incompatible';
END;
RAISE NOTICE '%' , 'Analyse';
RAISE NOTICE '%' , 'Analyse. Suppression des lignes inutiles';
-- <head>
DELETE FROM w_mat2a_data
USING
(
SELECT
MAX(CASE WHEN data ILIKE '<head>' THEN sequence ELSE NULL END) AS sequence_min,
MAX(CASE WHEN data ILIKE '</head>' THEN sequence ELSE NULL END) AS sequence_max
FROM w_mat2a_data
WHERE data ILIKE '<head>' OR data ILIKE '</head>'
) subview
WHERE sequence BETWEEN sequence_min AND sequence_max;
-- <colgroup>
PERFORM base.cti_execute(
'DELETE FROM w_mat2a_data
USING
(
SELECT
MAX(CASE WHEN data ILIKE ''<colgroup>'' THEN sequence ELSE NULL END) AS sequence_min,
MAX(CASE WHEN data ILIKE ''</colgroup>'' THEN sequence ELSE NULL END) AS sequence_max
FROM w_mat2a_data
WHERE data ILIKE ''<colgroup>'' OR data ILIKE ''</colgroup>''
) subview
WHERE sequence BETWEEN sequence_min AND sequence_max',10);
-- <colgroup> 2024
DELETE FROM w_mat2a_data
WHERE data ILIKE '<colgroup>%' AND data ilike '%</colgroup>'
;
-- <script>
DELETE FROM w_mat2a_data
USING
(
SELECT
MAX(CASE WHEN data ILIKE '<script%' THEN sequence ELSE NULL END) AS sequence_min,
MAX(CASE WHEN data ILIKE '</script>' THEN sequence ELSE NULL END) AS sequence_max
FROM w_mat2a_data
WHERE data ILIKE '<script%' OR data ILIKE '</script>'
) subview
WHERE sequence BETWEEN sequence_min AND sequence_max;
-- Conversion nouveau format 2024 comme ancien format
INSERT INTO w_mat2a_data(data, sequence)
SELECT
CASE
WHEN data LIKE '<div %' THEN '<table class="systitleandfootercontainer>'
WHEN data LIKE '<p><span %' THEN '<tr>'
WHEN data LIKE '</div%>' THEN '</table>'
ELSE data END AS data,
sequence+1 AS sequence
FROM w_mat2a_data
JOIN (
SELECT
CASE WHEN data LIKE '<div %' THEN sequence ELSE NULL END AS sequence_min,
LEAD(sequence) OVER (ORDER BY sequence) AS sequence_max
FROM w_mat2a_data
WHERE (data LIKE '<div %' AND data LIKE '%systitleandfootercontainer%' AND data LIKE '%id="IDX%' OR data LIKE '</div>%' )
) subview ON
sequence BETWEEN sequence_min AND sequence_max AND
(data LIKE '<div %' AND data LIKE '%systitleandfootercontainer%' AND data LIKE '%id="IDX%' OR data LIKE '</div>%' OR data LIKE '<p><span%' AND data LIKE '%systemtitle%')
UNION
SELECT
replace(replace(data,'<p><span','<td'),'</span>','</td>') AS data,
sequence+2 AS sequence
FROM w_mat2a_data
JOIN (
SELECT
CASE WHEN data LIKE '<div %' THEN sequence ELSE NULL END AS sequence_min,
LEAD(sequence) OVER (ORDER BY sequence) AS sequence_max
FROM w_mat2a_data
WHERE (data LIKE '<div %' AND data LIKE '%systitleandfootercontainer%' AND data LIKE '%id="IDX%' OR data LIKE '</div>%' )
) subview ON
sequence BETWEEN sequence_min AND sequence_max AND
data LIKE '<p><span%' AND data LIKE '%systemtitle%'
UNION
SELECT
'</tr>'::text AS data,
sequence+3 AS sequence
FROM w_mat2a_data
JOIN (
SELECT
CASE WHEN data LIKE '<div %' THEN sequence ELSE NULL END AS sequence_min,
LEAD(sequence) OVER (ORDER BY sequence) AS sequence_max
FROM w_mat2a_data
WHERE (data LIKE '<div %' AND data LIKE '%systitleandfootercontainer%' AND data LIKE '%id="IDX%' OR data LIKE '</div>%' )
) subview ON
sequence BETWEEN sequence_min AND sequence_max AND
data LIKE '<p><span%' AND data LIKE '%systemtitle%'
;
----------------- Fin conversion
UPDATE w_mat2a_data
SET data = replace(data,'<A ' || split_part(split_part(data,'<A ',2),'>',1) || '>','')
WHERE split_part(split_part(data,'<A ',2),'>',1) <> '';
UPDATE w_mat2a_data
SET data = replace(data,'<a href' || split_part(split_part(data,'<a href',2),'>',1) || '>','')
WHERE data LIKE '%<a href%';
UPDATE w_mat2a_data
SET data = replace(data,'</a>','')
WHERE data LIKE '%</a>%';
UPDATE w_mat2a_data SET data = replace(data,chr(160),' ') WHERE data LIKE '%' || chr(160) || '%';
UPDATE w_mat2a_data SET data = replace(data,'&nbsp;',' ') WHERE data LIKE '%&nbsp;%';
UPDATE w_mat2a_data SET data = replace(data,'&lt;','<') WHERE data LIKE '%&lt;%';
UPDATE w_mat2a_data SET data = replace(data,'&gt;','>') WHERE data LIKE '%&gt;%';
UPDATE w_mat2a_data SET data = replace(data,'&#39;','''') WHERE data LIKE '%&#39;%';
UPDATE w_mat2a_data SET data = regexp_replace(data,'<br/?>',' ', 'g') WHERE data LIKE '%<br%';
UPDATE w_mat2a_data SET data = replace(data,'<p>','') WHERE data LIKE '%<p>%';
UPDATE w_mat2a_data SET data = trim(data) WHERE data <> trim(data);
DELETE FROM w_mat2a_data WHERE trim(data) = '';
-- Eclatement des tag rassemblés sur une seule ligne
INSERT INTO w_mat2a_data(sequence, data)
SELECT sequence + 99, '</table>'
FROM w_mat2a_data
WHERE (data like '%<table %' OR data like '%<table>%') AND data like '%</table>%';
UPDATE w_mat2a_data
SET data = split_part(data,'</table>',1)
WHERE (data like '%<table %' OR data like '%<table>%') AND data like '%</table>%';
INSERT INTO w_mat2a_data(sequence, data)
SELECT sequence+1, '<tr>'||split_part(data,'<tr>',2)
FROM w_mat2a_data
WHERE data LIKE '%<table %' AND data like '%<tr>%' ;
UPDATE w_mat2a_data
SET data = split_part(data,'<tr>',1)
WHERE data LIKE '%<table %' AND data like '%<tr>%' ;
INSERT INTO w_mat2a_data(sequence, data)
SELECT sequence+1, trim('<td ' || split_part(split_part(data,'<td ',2),'</td>',1) || '</td>')
FROM w_mat2a_data
WHERE data LIKE '%<tr>%' AND data like '%</tr>%';
INSERT INTO w_mat2a_data(sequence, data)
SELECT sequence+2, '</tr>'
FROM w_mat2a_data
WHERE data LIKE '%<tr>%' AND data like '%</tr>%';
UPDATE w_mat2a_data
SET data = split_part(data,'<td ',1)
WHERE data LIKE '%<tr>%' AND data like '%</tr>%';
-- Pointage des tables
RAISE NOTICE '%' , 'Analyse. identification des tables';
PERFORM base.cti_execute(
'UPDATE w_mat2a_data
SET table_sequence = sequence_min
FROM
(
SELECT
MAX(CASE WHEN data LIKE ''<table %'' THEN sequence ELSE NULL END) AS sequence_min,
MAX(CASE WHEN data LIKE ''</table>%'' THEN sequence ELSE NULL END) AS sequence_max
FROM w_mat2a_data
WHERE table_sequence = 0 AND
(data LIKE ''<table %'' OR data LIKE ''</table>%'')
) subview
WHERE sequence BETWEEN sequence_min AND sequence_max',200);
-- Pointage des entetes de table
RAISE NOTICE '%' , 'Analyse. identification des entetes';
PERFORM base.cti_execute(
'UPDATE w_mat2a_data
SET header_sequence = sequence_min
FROM
(
SELECT
MAX(CASE WHEN data LIKE ''<thead>%'' THEN sequence ELSE NULL END) AS sequence_min,
MAX(CASE WHEN data LIKE ''</thead>%'' THEN sequence ELSE NULL END) AS sequence_max
FROM w_mat2a_data
WHERE table_sequence <> 0 AND
header_sequence = 0 AND
(data LIKE ''<thead>%'' OR data LIKE ''</thead>%'')
) subview
WHERE sequence BETWEEN sequence_min AND sequence_max',200);
-- Pointage des lignes
RAISE NOTICE '%' , 'Analyse. identification des lignes';
UPDATE w_mat2a_data
SET row_sequence = sequence_min
FROM
(
SELECT
w_mat2a_data_tr.sequence AS sequence_min,
MIN(w_mat2a_data.sequence) AS sequence_max
FROM w_mat2a_data w_mat2a_data_tr, w_mat2a_data
WHERE w_mat2a_data.sequence BETWEEN w_mat2a_data_tr.sequence AND w_mat2a_data_tr.sequence + 100000 AND
w_mat2a_data_tr.table_sequence <> 0 AND
w_mat2a_data_tr.header_sequence = 0 AND
w_mat2a_data_tr.row_sequence = 0 AND
w_mat2a_data_tr.data LIKE '<tr>%' AND
w_mat2a_data.table_sequence <> 0 AND
w_mat2a_data.header_sequence = 0 AND
w_mat2a_data.row_sequence = 0 AND
w_mat2a_data.data LIKE '</tr>%'
GROUP BY 1) subview
WHERE sequence BETWEEN sequence_min AND sequence_max;
RAISE NOTICE '%' , 'Analyse. identification des donnees';
-- Headers
UPDATE w_mat2a_data
SET columns = subview.table_columns
FROM (
SELECT
header_sequence,
base.cti_group_array3(split_part(split_part(data,'>',2),'<',1)) AS table_columns
FROM (SELECT header_sequence, data
FROM w_mat2a_data
WHERE header_sequence <> 0 AND
data LIKE '<th %'
ORDER BY sequence
) subview
GROUP BY 1
ORDER BY 1) subview
WHERE w_mat2a_data.sequence = subview.header_sequence;
-- Data
UPDATE w_mat2a_data
SET columns = subview.columns
FROM (
SELECT
row_sequence,
base.cti_group_array3(trim(split_part(split_part(data,'>',2),'<',1))) AS columns
FROM (SELECT row_sequence, data
FROM w_mat2a_data
WHERE row_sequence <> 0 AND
(data LIKE '<td %' OR data LIKE '<th %')
ORDER BY sequence
) subview
GROUP BY 1
ORDER BY 1) subview
WHERE w_mat2a_data.sequence = subview.row_sequence;
UPDATE w_mat2a_data
SET columns = subview.columns
FROM (
SELECT
table_sequence,
base.cti_group_array3(entete) AS columns
FROM (
SELECT table_sequence, trim(columns[1]) AS entete
FROM w_mat2a_data
WHERE data LIKE '<tr>%' AND
trim(columns[1]) <> '''' AND
columns[1] NOT ILIKE '%Finess=%' AND
table_sequence IN
(
SELECT table_sequence from w_mat2a_data
WHERE data LIKE '<table%' AND data NOT ILIKE '%class="Table%'
)
ORDER BY sequence
) subview
GROUP BY 1
ORDER BY 1
) subview
WHERE sequence = subview.table_sequence;
UPDATE w_mat2a_data
SET title_sequence =
(SELECT MAX(table_sequence)
FROM w_mat2a_data w_mat2a_data_title
WHERE w_mat2a_data_title.data LIKE '<table%' AND
w_mat2a_data_title.data NOT ILIKE '%class="Table%' AND
(w_mat2a_data_title.columns[2] IS NOT NULL OR _file_code = 'T1V4SYNTS') AND
w_mat2a_data_title.table_sequence < w_mat2a_data.table_sequence)
WHERE w_mat2a_data.data LIKE '<table%' AND
w_mat2a_data.data ILIKE '%class="Table%';
UPDATE w_mat2a_data
SET columns = subview.columns
FROM (
SELECT table_sequence, columns
FROM w_mat2a_data
WHERE data LIKE '<table%' AND table_sequence <> 0
ORDER BY sequence
) subview
WHERE title_sequence = subview.table_sequence;
-- Type des lignes
UPDATE w_mat2a_data
SET line_type = CASE
WHEN data like '<table %' THEN 'T'
WHEN data like '<thead>%' THEN 'H'
WHEN sequence IN (SELECT row_sequence FROM w_mat2a_data WHERE sequence <> 0 AND data LIKE '<th %') THEN 'F'
ELSE 'D' END
WHERE columns IS NOT NULL AND
table_sequence NOT IN (SELECT table_sequence FROM w_mat2a_data WHERE data LIKE '<table%' AND data NOT ILIKE '%class="Table%');
-- Report des titres pour tables sans titre
UPDATE w_mat2a_data SET
columns[1] = CASE WHEN w_mat2a_data.columns[1] <> '' THEN w_mat2a_data.columns[1] ELSE w_mat2a_data_OK.columns[1] END,
columns[2] = CASE WHEN w_mat2a_data.columns[2] <> '' THEN w_mat2a_data.columns[2] ELSE w_mat2a_data_OK.columns[2] END,
columns[3] = CASE WHEN w_mat2a_data.columns[3] <> '' THEN w_mat2a_data.columns[3] ELSE w_mat2a_data_OK.columns[3] END,
columns[4] = CASE WHEN w_mat2a_data.columns[4] <> '' THEN w_mat2a_data.columns[4] ELSE w_mat2a_data_OK.columns[4] END
FROM w_mat2a_data w_mat2a_data_ok
JOIN (
SELECT w_mat2a_data.sequence, MAX(w_mat2a_data_OK.sequence) AS sequence_OK
FROM w_mat2a_data
JOIN w_mat2a_data w_mat2a_data_OK ON w_mat2a_data_OK.line_type = 'T' AND
w_mat2a_data_OK.sequence < w_mat2a_data.sequence AND
(w_mat2a_data_OK.columns[1] <> '''' AND w_mat2a_data_OK.columns[1] IS NOT NULL AND w_mat2a_data_OK.columns[2] IS NOT NULL)
WHERE w_mat2a_data.line_type = 'T' AND
(w_mat2a_data.columns[1] = '''' OR w_mat2a_data.columns[1] = '' OR w_mat2a_data.columns[1] IS NULL OR w_mat2a_data.columns[2] IS NULL)
GROUP BY 1
) subview ON subview.sequence_OK = w_mat2a_data_ok.sequence
WHERE w_mat2a_data.sequence = subview.sequence;
UPDATE w_mat2a_data
SET columns = array_append(w_mat2a_data_OK.columns,w_mat2a_data.columns[1])
FROM w_mat2a_data w_mat2a_data_ok
JOIN (
SELECT w_mat2a_data.sequence, MAX(w_mat2a_data_OK.sequence) AS sequence_OK
FROM w_mat2a_data
JOIN w_mat2a_data w_mat2a_data_OK ON w_mat2a_data_OK.line_type = 'T' AND
w_mat2a_data_OK.sequence < w_mat2a_data.sequence AND
(w_mat2a_data_OK.columns[1] <> '''' AND w_mat2a_data_OK.columns[1] IS NOT NULL AND w_mat2a_data_OK.columns[2] IS NOT NULL)
WHERE w_mat2a_data.line_type = 'T' AND
(w_mat2a_data.columns[1] = '''' OR w_mat2a_data.columns[1] IS NULL)
GROUP BY 1
) subview ON subview.sequence_OK = w_mat2a_data_ok.sequence
WHERE w_mat2a_data.sequence = subview.sequence;
-- Sous titre
UPDATE w_mat2a_data
SET columns = array_append(w_mat2a_data.columns,w_mat2a_data_OK.columns[1])
FROM w_mat2a_data w_mat2a_data_ok
JOIN (
SELECT w_mat2a_data.sequence, MAX(w_mat2a_data_OK.sequence) AS sequence_OK
FROM w_mat2a_data
JOIN w_mat2a_data w_mat2a_data_OK ON w_mat2a_data_OK.data ILIKE '%<table %' AND
w_mat2a_data_OK.sequence < w_mat2a_data.sequence AND
w_mat2a_data_OK.columns[1] IS NOT NULL AND w_mat2a_data_OK.columns[1] NOT LIKE '''%' AND w_mat2a_data_OK.columns[2] IS NULL
WHERE w_mat2a_data.line_type = 'T'
GROUP BY 1
ORDER BY 1
) subview ON subview.sequence_OK = w_mat2a_data_ok.sequence
WHERE w_mat2a_data.sequence = subview.sequence AND
w_mat2a_data_OK.columns[1] IS DISTINCT FROM w_mat2a_data.columns[1];
UPDATE w_mat2a_data
SET columns = array_append(columns,sous_texte)
FROM (
SELECT w_mat2a_data.sequence, trim(split_part(split_part(w_mat2a_data.data,'>',2),'<',1)) AS sous_texte, MIN(w_mat2a_data_T.sequence) AS table_sequence
FROM w_mat2a_data
JOIN w_mat2a_data w_mat2a_data_T ON w_mat2a_data_T.line_type = 'T' AND w_mat2a_data_T.sequence > w_mat2a_data.sequence
WHERE w_mat2a_data.data ILIKE '%<div class="c m Byline">%'
GROUP BY 1,2
) subview
WHERE w_mat2a_data.sequence = subview.table_sequence;
RAISE NOTICE '%' , 'Analyse. Renumerotation des sequences';
-- Sequence de table à partir de 1
UPDATE w_mat2a_data
SET table_sequence = i
FROM (
SELECT i, tables_array[i] As table_sequence
FROM (
SELECT tables_array, generate_series(array_lower(tables_array,1),array_upper(tables_array,1)) AS i
FROM (
SELECT base.cti_group_array3(table_sequence) AS tables_array
FROM (
SELECT table_sequence
FROM w_mat2a_data
WHERE line_type = 'T'
ORDER BY sequence
) subview
) subview
) subview
) subview
WHERE w_mat2a_data.table_sequence = subview.table_sequence;
-- Sequence de ligne à partir de 1 par table
UPDATE w_mat2a_data
SET row_sequence = i
FROM (
SELECT i, rows_array[i] As row_sequence
FROM (
SELECT rows_array, generate_series(array_lower(rows_array,1),array_upper(rows_array,1)) AS i
FROM (
SELECT table_sequence, base.cti_group_array3(row_sequence) AS rows_array
FROM (
SELECT table_sequence, row_sequence
FROM w_mat2a_data
WHERE line_type IN ('D','F')
ORDER BY sequence
) subview
GROUP BY 1
) subview
) subview
) subview
WHERE w_mat2a_data.row_sequence = subview.row_sequence;
RAISE NOTICE '%' , 'Conversion numerique';
FOR _i IN SELECT generate_series(1,(select max(array_upper(columns,1)) from w_mat2a_data)) LOOP
UPDATE w_mat2a_data
SET columns[_i] =
trim(
CASE WHEN base.cti_is_num(columns[_i])
THEN replace(replace(columns[_i],' ',''),',','.')
ELSE columns[_i]
END
)
WHERE columns[_i] <>
trim(
CASE WHEN base.cti_is_num(columns[_i])
THEN replace(replace(columns[_i],' ',''),',','.')
ELSE columns[_i]
END
);
END LOOP;
RAISE NOTICE '%' , 'Ajout à la table p_mat2a_data';
DELETE FROM pmsi.p_mat2a_data
WHERE mat2a_import_id = i_import_id AND
file_code = _file_code;
INSERT INTO pmsi.p_mat2a_data (mat2a_import_id, file_code, table_sequence, row_sequence, line_type, data_mat2a)
SELECT i_import_id,
_file_code,
table_sequence,
row_sequence,
line_type,
columns
FROM w_mat2a_data
WHERE columns IS NOT NULL AND
table_sequence NOT IN (SELECT table_sequence FROM w_mat2a_data WHERE data LIKE '<table%' AND data NOT ILIKE '%class="Table%')
ORDER BY sequence;
RETURN 'OK';
END;