30 lines
		
	
	
		
			1002 B
		
	
	
	
		
			Markdown
		
	
	
	
	
	
			
		
		
	
	
			30 lines
		
	
	
		
			1002 B
		
	
	
	
		
			Markdown
		
	
	
	
	
	
| # Repeated Entries
 | |
| 
 | |
| ```postgresql
 | |
| with timeline_items as (select jsonb_array_elements(json -> 'timelineItems') - 'samples' as entry
 | |
|                         from raw_files)
 | |
| select entry ->> 'itemId'    as item_id,
 | |
|        entry ->> 'lastSaved' as last_saved,
 | |
|        entry ->> 'endDate'   as end_date,
 | |
|        entry ->> 'startDate' as start_date
 | |
| from timeline_items
 | |
| WHERE entry ->> 'itemId' IN (SELECT entry ->> 'itemId'
 | |
|                              FROM timeline_items
 | |
|                              GROUP BY entry ->> 'itemId'
 | |
|                              HAVING COUNT(entry ->> 'itemId') > 1);
 | |
| ```
 | |
| 
 | |
| ```postgresql
 | |
| select md5(rfe.entry)
 | |
| from (select jsonb_array_elements(json -> 'timelineItems') - 'samples' as entry
 | |
|       from raw_files) as rfe
 | |
| where rfe.entry ->> 'itemId' = '01300b32-a3f7-4911-ab63-a0cbf0f312e0';
 | |
| ```
 | |
| 
 | |
| ```csv
 | |
| c7e2a142aabdcdddd00143da6612c235
 | |
| c7e2a142aabdcdddd00143da6612c235
 | |
| ```
 | |
| 
 | |
| running this query we see that the JSON content of the repeated entry is the same so it can be discounted.
 |